xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_instr_alugroup.h"
8 
9 #include "sfn_debug.h"
10 #include "sfn_instr_export.h"
11 #include "sfn_instr_mem.h"
12 #include "sfn_instr_tex.h"
13 
14 #include <algorithm>
15 
16 namespace r600 {
17 
AluGroup()18 AluGroup::AluGroup() { std::fill(m_slots.begin(), m_slots.end(), nullptr); }
19 
20 bool
add_instruction(AluInstr * instr)21 AluGroup::add_instruction(AluInstr *instr)
22 {
23    /* we can only schedule one op that accesses LDS or
24      the LDS read queue */
25    if (m_has_lds_op && instr->has_lds_access())
26       return false;
27 
28    if (instr->has_alu_flag(alu_is_trans)) {
29       ASSERTED auto opinfo = alu_ops.find(instr->opcode());
30       assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
31       if (add_trans_instructions(instr)) {
32          m_has_kill_op |= instr->is_kill();
33          return true;
34       }
35    }
36 
37    if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
38       instr->set_parent_group(this);
39       m_has_kill_op |= instr->is_kill();
40       return true;
41    }
42 
43    auto opinfo = alu_ops.find(instr->opcode());
44    assert(opinfo != alu_ops.end());
45 
46    if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) &&
47        add_trans_instructions(instr)) {
48       instr->set_parent_group(this);
49       m_has_kill_op |= instr->is_kill();
50       return true;
51    }
52 
53    return false;
54 }
55 
56 bool
add_trans_instructions(AluInstr * instr)57 AluGroup::add_trans_instructions(AluInstr *instr)
58 {
59    if (m_slots[4] || s_max_slots < 5)
60       return false;
61 
62    /* LDS instructions have to be scheduled in X */
63    if (instr->has_alu_flag(alu_is_lds))
64       return false;
65 
66    auto opinfo = alu_ops.find(instr->opcode());
67    assert(opinfo != alu_ops.end());
68 
69    if (!opinfo->second.can_channel(AluOp::t, s_chip_class))
70       return false;
71 
72    /* if we schedule a non-trans instr into the trans slot, we have to make
73     * sure that the corresponding vector slot is already occupied, otherwise
74     * the hardware will schedule it as vector op and the bank-swizzle as
75     * checked here (and in r600_asm.c) will not catch conflicts.
76     */
77    if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
78       if (instr->dest() && instr->dest()->pin() == pin_free) {
79          int used_slot = 3;
80          auto dest = instr->dest();
81          int free_mask = 0xf;
82 
83          for (auto p : dest->parents()) {
84             auto alu = p->as_alu();
85             if (alu)
86                free_mask &= alu->allowed_dest_chan_mask();
87          }
88 
89          for (auto u : dest->uses()) {
90             free_mask &= u->allowed_src_chan_mask();
91             if (!free_mask)
92                return false;
93          }
94 
95          while (used_slot >= 0 &&
96                 (!m_slots[used_slot] || !(free_mask & (1 << used_slot))))
97             --used_slot;
98 
99          // if we schedule a non-trans instr into the trans slot,
100          // there should always be some slot that is already used
101          if (used_slot < 0)
102             return false;
103 
104          instr->dest()->set_chan(used_slot);
105       }
106    }
107 
108    if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
109       return false;
110 
111    for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown; ++i) {
112       AluReadportReservation readports_evaluator = m_readports_evaluator;
113       if (readports_evaluator.schedule_trans_instruction(*instr, i) &&
114           update_indirect_access(instr)) {
115          m_readports_evaluator = readports_evaluator;
116          m_slots[4] = instr;
117          instr->pin_sources_to_chan();
118          sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
119 
120          /* We added a vector op in the trans channel, so we have to
121           * make sure the corresponding vector channel is used */
122          assert(instr->has_alu_flag(alu_is_trans) || m_slots[instr->dest_chan()]);
123          m_has_kill_op |= instr->is_kill();
124          return true;
125       }
126    }
127    return false;
128 }
129 
130 int
free_slots() const131 AluGroup::free_slots() const
132 {
133    int free_mask = 0;
134    for (int i = 0; i < s_max_slots; ++i) {
135       if (!m_slots[i])
136          free_mask |= 1 << i;
137    }
138    return free_mask;
139 }
140 
141 bool
add_vec_instructions(AluInstr * instr)142 AluGroup::add_vec_instructions(AluInstr *instr)
143 {
144    int param_src = -1;
145    for (auto& s : instr->sources()) {
146       auto is = s->as_inline_const();
147       if (is)
148          param_src = is->sel() - ALU_SRC_PARAM_BASE;
149    }
150 
151    if (param_src >= 0) {
152       if (m_param_used < 0)
153          m_param_used = param_src;
154       else if (m_param_used != param_src)
155          return false;
156    }
157 
158    if (m_has_lds_op && instr->has_lds_access())
159       return false;
160 
161    int preferred_chan = instr->dest_chan();
162    if (!m_slots[preferred_chan]) {
163       if (instr->bank_swizzle() != alu_vec_unknown) {
164          if (try_readport(instr, instr->bank_swizzle())) {
165             m_has_kill_op |= instr->is_kill();
166             return true;
167          }
168       } else {
169          for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
170             if (try_readport(instr, i)) {
171                m_has_kill_op |= instr->is_kill();
172                return true;
173             }
174          }
175       }
176    } else {
177 
178       auto dest = instr->dest();
179       if (dest && (dest->pin() == pin_free || dest->pin() == pin_group)) {
180 
181          int free_mask = 0xf;
182          for (auto p : dest->parents()) {
183             auto alu = p->as_alu();
184             if (alu)
185                free_mask &= alu->allowed_dest_chan_mask();
186          }
187 
188          for (auto u : dest->uses()) {
189             free_mask &= u->allowed_src_chan_mask();
190             if (!free_mask)
191                return false;
192          }
193 
194          int free_chan = 0;
195          while (free_chan < 4 && (m_slots[free_chan] || !(free_mask & (1 << free_chan))))
196             free_chan++;
197 
198          if (free_chan < 4) {
199             sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
200             dest->set_chan(free_chan);
201             if (instr->bank_swizzle() != alu_vec_unknown) {
202                if (try_readport(instr, instr->bank_swizzle())) {
203                   m_has_kill_op |= instr->is_kill();
204                   return true;
205                }
206             } else {
207                for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
208                   if (try_readport(instr, i)) {
209                      m_has_kill_op |= instr->is_kill();
210                      return true;
211                   }
212                }
213             }
214          }
215       }
216    }
217    return false;
218 }
219 
update_readport_reserver()220 void AluGroup::update_readport_reserver()
221 {
222    AluReadportReservation readports_evaluator;
223    for (int i = 0; i < 4;  ++i) {
224       if (!m_slots[i])
225          continue;
226 
227       AluReadportReservation re = readports_evaluator;
228       AluBankSwizzle bs = alu_vec_012;
229       while (bs != alu_vec_unknown) {
230          if (re.schedule_vec_instruction(*m_slots[i], bs)) {
231             readports_evaluator = re;
232             break;
233          }
234          ++bs;
235       }
236       if (bs == alu_vec_unknown)
237          unreachable("Bank swizzle should have been checked before");
238    }
239 
240    if (s_max_slots == 5 && m_slots[4]) {
241       AluReadportReservation re = readports_evaluator;
242       AluBankSwizzle bs = sq_alu_scl_201;
243       while (bs != sq_alu_scl_unknown) {
244          if (re.schedule_vec_instruction(*m_slots[4], bs)) {
245             readports_evaluator = re;
246             break;
247          }
248          ++bs;
249       }
250       if (bs == sq_alu_scl_unknown)
251          unreachable("Bank swizzle should have been checked before");
252    }
253 }
254 
255 bool
try_readport(AluInstr * instr,AluBankSwizzle cycle)256 AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
257 {
258    int preferred_chan = instr->dest_chan();
259    AluReadportReservation readports_evaluator = m_readports_evaluator;
260    if (readports_evaluator.schedule_vec_instruction(*instr, cycle) &&
261        update_indirect_access(instr)) {
262       m_readports_evaluator = readports_evaluator;
263       m_slots[preferred_chan] = instr;
264       m_has_lds_op |= instr->has_lds_access();
265       sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
266       auto dest = instr->dest();
267       if (dest) {
268          if (dest->pin() == pin_free)
269             dest->set_pin(pin_chan);
270          else if (dest->pin() == pin_group)
271             dest->set_pin(pin_chgr);
272       }
273       instr->pin_sources_to_chan();
274       return true;
275    }
276    return false;
277 }
278 
replace_source(PRegister old_src,PVirtualValue new_src)279 bool AluGroup::replace_source(PRegister old_src, PVirtualValue new_src)
280 {
281    AluReadportReservation rpr_sum;
282 
283    // At this point we should not have anything in slot 4
284    assert(s_max_slots == 4 || !m_slots[4]);
285 
286    for (int slot = 0; slot < 4; ++slot) {
287       if (!m_slots[slot])
288          continue;
289 
290       assert(m_slots[slot]->alu_slots() == 1);
291 
292       if (!m_slots[slot]->can_replace_source(old_src, new_src))
293          return false;
294 
295       auto& srcs = m_slots[slot]->sources();
296 
297       PVirtualValue test_src[3];
298       std::transform(srcs.begin(), srcs.end(), test_src,
299                      [old_src, new_src](PVirtualValue s) {
300          return old_src->equal_to(*s) ? new_src : s;
301       });
302 
303       AluBankSwizzle bs = alu_vec_012;
304       while (bs != alu_vec_unknown) {
305          AluReadportReservation rpr = rpr_sum;
306          if (rpr.schedule_vec_src(test_src,srcs.size(), bs)) {
307             rpr_sum = rpr;
308             break;
309          }
310          ++bs;
311       }
312 
313       if (bs == alu_vec_unknown)
314          return false;
315    }
316 
317    bool success = false;
318 
319    for (int slot = 0; slot < 4; ++slot) {
320       if (!m_slots[slot])
321          continue;
322       success |= m_slots[slot]->do_replace_source(old_src, new_src);
323       for (auto& s : m_slots[slot]->sources()) {
324          if (s->pin() == pin_free)
325             s->set_pin(pin_chan);
326          else if (s->pin() == pin_group)
327                s->set_pin(pin_chgr);
328       }
329    }
330 
331    m_readports_evaluator = rpr_sum;
332    return success;
333 }
334 
335 bool
update_indirect_access(AluInstr * instr)336 AluGroup::update_indirect_access(AluInstr *instr)
337 {
338    auto [indirect_addr, for_dest, index_reg] = instr->indirect_addr();
339 
340    if (indirect_addr) {
341       assert(!index_reg);
342       if (!m_addr_used) {
343          m_addr_used = indirect_addr;
344          m_addr_for_src = !for_dest;
345          m_addr_is_index = false;
346       } else if (!indirect_addr->equal_to(*m_addr_used) || m_addr_is_index) {
347          return false;
348       }
349    } else if (index_reg) {
350        if (!m_addr_used) {
351            m_addr_used = index_reg;
352            m_addr_is_index = true;
353        } else if (!index_reg->equal_to(*m_addr_used) || !m_addr_is_index) {
354            return false;
355        }
356    }
357    return true;
358 }
359 
index_mode_load()360 bool AluGroup::index_mode_load()
361 {
362    if (!m_slots[0] || !m_slots[0]->dest())
363       return false;
364 
365    Register *dst = m_slots[0]->dest();
366    return dst->has_flag(Register::addr_or_idx) && dst->sel() > 0;
367 }
368 
369 void
accept(ConstInstrVisitor & visitor) const370 AluGroup::accept(ConstInstrVisitor& visitor) const
371 {
372    visitor.visit(*this);
373 }
374 
375 void
accept(InstrVisitor & visitor)376 AluGroup::accept(InstrVisitor& visitor)
377 {
378    visitor.visit(this);
379 }
380 
381 void
set_scheduled()382 AluGroup::set_scheduled()
383 {
384    for (int i = 0; i < s_max_slots; ++i) {
385       if (m_slots[i])
386          m_slots[i]->set_scheduled();
387    }
388    if (m_origin)
389       m_origin->set_scheduled();
390 }
391 
392 void
fix_last_flag()393 AluGroup::fix_last_flag()
394 {
395    bool last_seen = false;
396    for (int i = s_max_slots - 1; i >= 0; --i) {
397       if (m_slots[i]) {
398          if (!last_seen) {
399             m_slots[i]->set_alu_flag(alu_last_instr);
400             last_seen = true;
401          } else {
402             m_slots[i]->reset_alu_flag(alu_last_instr);
403          }
404       }
405    }
406 }
407 
408 bool
is_equal_to(const AluGroup & other) const409 AluGroup::is_equal_to(const AluGroup& other) const
410 {
411    for (int i = 0; i < s_max_slots; ++i) {
412       if (!other.m_slots[i]) {
413          if (!m_slots[i])
414             continue;
415          else
416             return false;
417       }
418 
419       if (m_slots[i]) {
420          if (!other.m_slots[i])
421             return false;
422          else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
423             return false;
424       }
425    }
426    return true;
427 }
428 
429 bool
has_lds_group_end() const430 AluGroup::has_lds_group_end() const
431 {
432    for (int i = 0; i < s_max_slots; ++i) {
433       if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
434          return true;
435    }
436    return false;
437 }
438 
439 bool
do_ready() const440 AluGroup::do_ready() const
441 {
442    for (int i = 0; i < s_max_slots; ++i) {
443       if (m_slots[i] && !m_slots[i]->ready())
444          return false;
445    }
446    return true;
447 }
448 
449 void
forward_set_blockid(int id,int index)450 AluGroup::forward_set_blockid(int id, int index)
451 {
452    for (int i = 0; i < s_max_slots; ++i) {
453       if (m_slots[i]) {
454          m_slots[i]->set_blockid(id, index);
455       }
456    }
457 }
458 
459 uint32_t
slots() const460 AluGroup::slots() const
461 {
462    uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
463    for (int i = 0; i < s_max_slots; ++i) {
464       if (m_slots[i])
465          ++result;
466    }
467    if (m_addr_used) {
468       ++result;
469       if (m_addr_is_index && s_max_slots == 5)
470          ++result;
471    }
472 
473    return result;
474 }
475 
476 void
do_print(std::ostream & os) const477 AluGroup::do_print(std::ostream& os) const
478 {
479    const char slotname[] = "xyzwt";
480 
481    os << "ALU_GROUP_BEGIN\n";
482    for (int i = 0; i < s_max_slots; ++i) {
483       if (m_slots[i]) {
484          for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
485             os << ' ';
486          os << slotname[i] << ": ";
487          m_slots[i]->print(os);
488          os << "\n";
489       }
490    }
491    for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
492       os << ' ';
493    os << "ALU_GROUP_END";
494 }
495 
496 AluInstr::SrcValues
get_kconsts() const497 AluGroup::get_kconsts() const
498 {
499    AluInstr::SrcValues result;
500 
501    for (int i = 0; i < s_max_slots; ++i) {
502       if (m_slots[i]) {
503          for (auto s : m_slots[i]->sources())
504             if (s->as_uniform())
505                result.push_back(s);
506       }
507    }
508    return result;
509 }
510 
511 void
set_chipclass(r600_chip_class chip_class)512 AluGroup::set_chipclass(r600_chip_class chip_class)
513 {
514    s_chip_class = chip_class;
515    s_max_slots = chip_class == ISA_CC_CAYMAN ? 4 : 5;
516 }
517 
518 int AluGroup::s_max_slots = 5;
519 r600_chip_class AluGroup::s_chip_class = ISA_CC_EVERGREEN;
520 } // namespace r600
521