1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_instr_alugroup.h"
8
9 #include "sfn_debug.h"
10 #include "sfn_instr_export.h"
11 #include "sfn_instr_mem.h"
12 #include "sfn_instr_tex.h"
13
14 #include <algorithm>
15
16 namespace r600 {
17
AluGroup()18 AluGroup::AluGroup() { std::fill(m_slots.begin(), m_slots.end(), nullptr); }
19
20 bool
add_instruction(AluInstr * instr)21 AluGroup::add_instruction(AluInstr *instr)
22 {
23 /* we can only schedule one op that accesses LDS or
24 the LDS read queue */
25 if (m_has_lds_op && instr->has_lds_access())
26 return false;
27
28 if (instr->has_alu_flag(alu_is_trans)) {
29 ASSERTED auto opinfo = alu_ops.find(instr->opcode());
30 assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
31 if (add_trans_instructions(instr)) {
32 m_has_kill_op |= instr->is_kill();
33 return true;
34 }
35 }
36
37 if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
38 instr->set_parent_group(this);
39 m_has_kill_op |= instr->is_kill();
40 return true;
41 }
42
43 auto opinfo = alu_ops.find(instr->opcode());
44 assert(opinfo != alu_ops.end());
45
46 if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) &&
47 add_trans_instructions(instr)) {
48 instr->set_parent_group(this);
49 m_has_kill_op |= instr->is_kill();
50 return true;
51 }
52
53 return false;
54 }
55
56 bool
add_trans_instructions(AluInstr * instr)57 AluGroup::add_trans_instructions(AluInstr *instr)
58 {
59 if (m_slots[4] || s_max_slots < 5)
60 return false;
61
62 /* LDS instructions have to be scheduled in X */
63 if (instr->has_alu_flag(alu_is_lds))
64 return false;
65
66 auto opinfo = alu_ops.find(instr->opcode());
67 assert(opinfo != alu_ops.end());
68
69 if (!opinfo->second.can_channel(AluOp::t, s_chip_class))
70 return false;
71
72 /* if we schedule a non-trans instr into the trans slot, we have to make
73 * sure that the corresponding vector slot is already occupied, otherwise
74 * the hardware will schedule it as vector op and the bank-swizzle as
75 * checked here (and in r600_asm.c) will not catch conflicts.
76 */
77 if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
78 if (instr->dest() && instr->dest()->pin() == pin_free) {
79 int used_slot = 3;
80 auto dest = instr->dest();
81 int free_mask = 0xf;
82
83 for (auto p : dest->parents()) {
84 auto alu = p->as_alu();
85 if (alu)
86 free_mask &= alu->allowed_dest_chan_mask();
87 }
88
89 for (auto u : dest->uses()) {
90 free_mask &= u->allowed_src_chan_mask();
91 if (!free_mask)
92 return false;
93 }
94
95 while (used_slot >= 0 &&
96 (!m_slots[used_slot] || !(free_mask & (1 << used_slot))))
97 --used_slot;
98
99 // if we schedule a non-trans instr into the trans slot,
100 // there should always be some slot that is already used
101 if (used_slot < 0)
102 return false;
103
104 instr->dest()->set_chan(used_slot);
105 }
106 }
107
108 if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
109 return false;
110
111 for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown; ++i) {
112 AluReadportReservation readports_evaluator = m_readports_evaluator;
113 if (readports_evaluator.schedule_trans_instruction(*instr, i) &&
114 update_indirect_access(instr)) {
115 m_readports_evaluator = readports_evaluator;
116 m_slots[4] = instr;
117 instr->pin_sources_to_chan();
118 sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
119
120 /* We added a vector op in the trans channel, so we have to
121 * make sure the corresponding vector channel is used */
122 assert(instr->has_alu_flag(alu_is_trans) || m_slots[instr->dest_chan()]);
123 m_has_kill_op |= instr->is_kill();
124 return true;
125 }
126 }
127 return false;
128 }
129
130 int
free_slots() const131 AluGroup::free_slots() const
132 {
133 int free_mask = 0;
134 for (int i = 0; i < s_max_slots; ++i) {
135 if (!m_slots[i])
136 free_mask |= 1 << i;
137 }
138 return free_mask;
139 }
140
141 bool
add_vec_instructions(AluInstr * instr)142 AluGroup::add_vec_instructions(AluInstr *instr)
143 {
144 int param_src = -1;
145 for (auto& s : instr->sources()) {
146 auto is = s->as_inline_const();
147 if (is)
148 param_src = is->sel() - ALU_SRC_PARAM_BASE;
149 }
150
151 if (param_src >= 0) {
152 if (m_param_used < 0)
153 m_param_used = param_src;
154 else if (m_param_used != param_src)
155 return false;
156 }
157
158 if (m_has_lds_op && instr->has_lds_access())
159 return false;
160
161 int preferred_chan = instr->dest_chan();
162 if (!m_slots[preferred_chan]) {
163 if (instr->bank_swizzle() != alu_vec_unknown) {
164 if (try_readport(instr, instr->bank_swizzle())) {
165 m_has_kill_op |= instr->is_kill();
166 return true;
167 }
168 } else {
169 for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
170 if (try_readport(instr, i)) {
171 m_has_kill_op |= instr->is_kill();
172 return true;
173 }
174 }
175 }
176 } else {
177
178 auto dest = instr->dest();
179 if (dest && (dest->pin() == pin_free || dest->pin() == pin_group)) {
180
181 int free_mask = 0xf;
182 for (auto p : dest->parents()) {
183 auto alu = p->as_alu();
184 if (alu)
185 free_mask &= alu->allowed_dest_chan_mask();
186 }
187
188 for (auto u : dest->uses()) {
189 free_mask &= u->allowed_src_chan_mask();
190 if (!free_mask)
191 return false;
192 }
193
194 int free_chan = 0;
195 while (free_chan < 4 && (m_slots[free_chan] || !(free_mask & (1 << free_chan))))
196 free_chan++;
197
198 if (free_chan < 4) {
199 sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
200 dest->set_chan(free_chan);
201 if (instr->bank_swizzle() != alu_vec_unknown) {
202 if (try_readport(instr, instr->bank_swizzle())) {
203 m_has_kill_op |= instr->is_kill();
204 return true;
205 }
206 } else {
207 for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
208 if (try_readport(instr, i)) {
209 m_has_kill_op |= instr->is_kill();
210 return true;
211 }
212 }
213 }
214 }
215 }
216 }
217 return false;
218 }
219
update_readport_reserver()220 void AluGroup::update_readport_reserver()
221 {
222 AluReadportReservation readports_evaluator;
223 for (int i = 0; i < 4; ++i) {
224 if (!m_slots[i])
225 continue;
226
227 AluReadportReservation re = readports_evaluator;
228 AluBankSwizzle bs = alu_vec_012;
229 while (bs != alu_vec_unknown) {
230 if (re.schedule_vec_instruction(*m_slots[i], bs)) {
231 readports_evaluator = re;
232 break;
233 }
234 ++bs;
235 }
236 if (bs == alu_vec_unknown)
237 unreachable("Bank swizzle should have been checked before");
238 }
239
240 if (s_max_slots == 5 && m_slots[4]) {
241 AluReadportReservation re = readports_evaluator;
242 AluBankSwizzle bs = sq_alu_scl_201;
243 while (bs != sq_alu_scl_unknown) {
244 if (re.schedule_vec_instruction(*m_slots[4], bs)) {
245 readports_evaluator = re;
246 break;
247 }
248 ++bs;
249 }
250 if (bs == sq_alu_scl_unknown)
251 unreachable("Bank swizzle should have been checked before");
252 }
253 }
254
255 bool
try_readport(AluInstr * instr,AluBankSwizzle cycle)256 AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
257 {
258 int preferred_chan = instr->dest_chan();
259 AluReadportReservation readports_evaluator = m_readports_evaluator;
260 if (readports_evaluator.schedule_vec_instruction(*instr, cycle) &&
261 update_indirect_access(instr)) {
262 m_readports_evaluator = readports_evaluator;
263 m_slots[preferred_chan] = instr;
264 m_has_lds_op |= instr->has_lds_access();
265 sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
266 auto dest = instr->dest();
267 if (dest) {
268 if (dest->pin() == pin_free)
269 dest->set_pin(pin_chan);
270 else if (dest->pin() == pin_group)
271 dest->set_pin(pin_chgr);
272 }
273 instr->pin_sources_to_chan();
274 return true;
275 }
276 return false;
277 }
278
replace_source(PRegister old_src,PVirtualValue new_src)279 bool AluGroup::replace_source(PRegister old_src, PVirtualValue new_src)
280 {
281 AluReadportReservation rpr_sum;
282
283 // At this point we should not have anything in slot 4
284 assert(s_max_slots == 4 || !m_slots[4]);
285
286 for (int slot = 0; slot < 4; ++slot) {
287 if (!m_slots[slot])
288 continue;
289
290 assert(m_slots[slot]->alu_slots() == 1);
291
292 if (!m_slots[slot]->can_replace_source(old_src, new_src))
293 return false;
294
295 auto& srcs = m_slots[slot]->sources();
296
297 PVirtualValue test_src[3];
298 std::transform(srcs.begin(), srcs.end(), test_src,
299 [old_src, new_src](PVirtualValue s) {
300 return old_src->equal_to(*s) ? new_src : s;
301 });
302
303 AluBankSwizzle bs = alu_vec_012;
304 while (bs != alu_vec_unknown) {
305 AluReadportReservation rpr = rpr_sum;
306 if (rpr.schedule_vec_src(test_src,srcs.size(), bs)) {
307 rpr_sum = rpr;
308 break;
309 }
310 ++bs;
311 }
312
313 if (bs == alu_vec_unknown)
314 return false;
315 }
316
317 bool success = false;
318
319 for (int slot = 0; slot < 4; ++slot) {
320 if (!m_slots[slot])
321 continue;
322 success |= m_slots[slot]->do_replace_source(old_src, new_src);
323 for (auto& s : m_slots[slot]->sources()) {
324 if (s->pin() == pin_free)
325 s->set_pin(pin_chan);
326 else if (s->pin() == pin_group)
327 s->set_pin(pin_chgr);
328 }
329 }
330
331 m_readports_evaluator = rpr_sum;
332 return success;
333 }
334
335 bool
update_indirect_access(AluInstr * instr)336 AluGroup::update_indirect_access(AluInstr *instr)
337 {
338 auto [indirect_addr, for_dest, index_reg] = instr->indirect_addr();
339
340 if (indirect_addr) {
341 assert(!index_reg);
342 if (!m_addr_used) {
343 m_addr_used = indirect_addr;
344 m_addr_for_src = !for_dest;
345 m_addr_is_index = false;
346 } else if (!indirect_addr->equal_to(*m_addr_used) || m_addr_is_index) {
347 return false;
348 }
349 } else if (index_reg) {
350 if (!m_addr_used) {
351 m_addr_used = index_reg;
352 m_addr_is_index = true;
353 } else if (!index_reg->equal_to(*m_addr_used) || !m_addr_is_index) {
354 return false;
355 }
356 }
357 return true;
358 }
359
index_mode_load()360 bool AluGroup::index_mode_load()
361 {
362 if (!m_slots[0] || !m_slots[0]->dest())
363 return false;
364
365 Register *dst = m_slots[0]->dest();
366 return dst->has_flag(Register::addr_or_idx) && dst->sel() > 0;
367 }
368
369 void
accept(ConstInstrVisitor & visitor) const370 AluGroup::accept(ConstInstrVisitor& visitor) const
371 {
372 visitor.visit(*this);
373 }
374
375 void
accept(InstrVisitor & visitor)376 AluGroup::accept(InstrVisitor& visitor)
377 {
378 visitor.visit(this);
379 }
380
381 void
set_scheduled()382 AluGroup::set_scheduled()
383 {
384 for (int i = 0; i < s_max_slots; ++i) {
385 if (m_slots[i])
386 m_slots[i]->set_scheduled();
387 }
388 if (m_origin)
389 m_origin->set_scheduled();
390 }
391
392 void
fix_last_flag()393 AluGroup::fix_last_flag()
394 {
395 bool last_seen = false;
396 for (int i = s_max_slots - 1; i >= 0; --i) {
397 if (m_slots[i]) {
398 if (!last_seen) {
399 m_slots[i]->set_alu_flag(alu_last_instr);
400 last_seen = true;
401 } else {
402 m_slots[i]->reset_alu_flag(alu_last_instr);
403 }
404 }
405 }
406 }
407
408 bool
is_equal_to(const AluGroup & other) const409 AluGroup::is_equal_to(const AluGroup& other) const
410 {
411 for (int i = 0; i < s_max_slots; ++i) {
412 if (!other.m_slots[i]) {
413 if (!m_slots[i])
414 continue;
415 else
416 return false;
417 }
418
419 if (m_slots[i]) {
420 if (!other.m_slots[i])
421 return false;
422 else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
423 return false;
424 }
425 }
426 return true;
427 }
428
429 bool
has_lds_group_end() const430 AluGroup::has_lds_group_end() const
431 {
432 for (int i = 0; i < s_max_slots; ++i) {
433 if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
434 return true;
435 }
436 return false;
437 }
438
439 bool
do_ready() const440 AluGroup::do_ready() const
441 {
442 for (int i = 0; i < s_max_slots; ++i) {
443 if (m_slots[i] && !m_slots[i]->ready())
444 return false;
445 }
446 return true;
447 }
448
449 void
forward_set_blockid(int id,int index)450 AluGroup::forward_set_blockid(int id, int index)
451 {
452 for (int i = 0; i < s_max_slots; ++i) {
453 if (m_slots[i]) {
454 m_slots[i]->set_blockid(id, index);
455 }
456 }
457 }
458
459 uint32_t
slots() const460 AluGroup::slots() const
461 {
462 uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
463 for (int i = 0; i < s_max_slots; ++i) {
464 if (m_slots[i])
465 ++result;
466 }
467 if (m_addr_used) {
468 ++result;
469 if (m_addr_is_index && s_max_slots == 5)
470 ++result;
471 }
472
473 return result;
474 }
475
476 void
do_print(std::ostream & os) const477 AluGroup::do_print(std::ostream& os) const
478 {
479 const char slotname[] = "xyzwt";
480
481 os << "ALU_GROUP_BEGIN\n";
482 for (int i = 0; i < s_max_slots; ++i) {
483 if (m_slots[i]) {
484 for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
485 os << ' ';
486 os << slotname[i] << ": ";
487 m_slots[i]->print(os);
488 os << "\n";
489 }
490 }
491 for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
492 os << ' ';
493 os << "ALU_GROUP_END";
494 }
495
496 AluInstr::SrcValues
get_kconsts() const497 AluGroup::get_kconsts() const
498 {
499 AluInstr::SrcValues result;
500
501 for (int i = 0; i < s_max_slots; ++i) {
502 if (m_slots[i]) {
503 for (auto s : m_slots[i]->sources())
504 if (s->as_uniform())
505 result.push_back(s);
506 }
507 }
508 return result;
509 }
510
511 void
set_chipclass(r600_chip_class chip_class)512 AluGroup::set_chipclass(r600_chip_class chip_class)
513 {
514 s_chip_class = chip_class;
515 s_max_slots = chip_class == ISA_CC_CAYMAN ? 4 : 5;
516 }
517
518 int AluGroup::s_max_slots = 5;
519 r600_chip_class AluGroup::s_chip_class = ISA_CC_EVERGREEN;
520 } // namespace r600
521