1 /* -*- mesa-c++ -*-
2 * Copyright 2021 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_instr_alugroup.h"
8 #include "sfn_instr_controlflow.h"
9 #include "sfn_instr_export.h"
10 #include "sfn_instr_fetch.h"
11 #include "sfn_instr_lds.h"
12 #include "sfn_instr_mem.h"
13 #include "sfn_instr_tex.h"
14
15 #include <iostream>
16 #include <limits>
17 #include <numeric>
18 #include <sstream>
19
20 namespace r600 {
21
22 using std::string;
23 using std::vector;
24
Instr()25 Instr::Instr():
26 m_use_count(0),
27 m_block_id(std::numeric_limits<int>::max()),
28 m_index(std::numeric_limits<int>::max())
29 {
30 }
31
~Instr()32 Instr::~Instr() {}
33
34 void
print(std::ostream & os) const35 Instr::print(std::ostream& os) const
36 {
37 do_print(os);
38 }
39
40 bool
ready() const41 Instr::ready() const
42 {
43 if (is_scheduled())
44 return true;
45 for (auto& i : m_required_instr)
46 if (!i->ready())
47 return false;
48 return do_ready();
49 }
50
51 bool
int_from_string_with_prefix_optional(const std::string & str,const std::string & prefix,int & value)52 int_from_string_with_prefix_optional(const std::string& str,
53 const std::string& prefix,
54 int& value)
55 {
56 if (str.substr(0, prefix.length()) != prefix) {
57 return false;
58 }
59
60 std::stringstream help(str.substr(prefix.length()));
61 help >> value;
62 return true;
63 }
64
65 int
int_from_string_with_prefix(const std::string & str,const std::string & prefix)66 int_from_string_with_prefix(const std::string& str, const std::string& prefix)
67 {
68 int retval = 0;
69 if (!int_from_string_with_prefix_optional(str, prefix, retval)) {
70 std::cerr << "Expect '" << prefix << "' as start of '" << str << "'\n";
71 assert(0);
72 }
73 return retval;
74 }
75
76 int
sel_and_szw_from_string(const std::string & str,RegisterVec4::Swizzle & swz,bool & is_ssa)77 sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle& swz, bool& is_ssa)
78 {
79 assert(str[0] == 'R' || str[0] == '_' || str[0] == 'S');
80 int sel = 0;
81
82 auto istr = str.begin() + 1;
83
84 if (str[0] == '_') {
85 while (istr != str.end() && *istr == '_')
86 ++istr;
87 sel = std::numeric_limits<int>::max();
88 } else {
89 while (istr != str.end() && isdigit(*istr)) {
90 sel *= 10;
91 sel += *istr - '0';
92 ++istr;
93 }
94 }
95
96 assert(*istr == '.');
97 istr++;
98
99 int i = 0;
100 while (istr != str.end()) {
101 switch (*istr) {
102 case 'x':
103 swz[i] = 0;
104 break;
105 case 'y':
106 swz[i] = 1;
107 break;
108 case 'z':
109 swz[i] = 2;
110 break;
111 case 'w':
112 swz[i] = 3;
113 break;
114 case '0':
115 swz[i] = 4;
116 break;
117 case '1':
118 swz[i] = 5;
119 break;
120 case '_':
121 swz[i] = 7;
122 break;
123 default:
124 unreachable("Unknown swizzle character");
125 }
126 ++istr;
127 ++i;
128 }
129
130 is_ssa = str[0] == 'S';
131
132 return sel;
133 }
134
135 bool
is_last() const136 Instr::is_last() const
137 {
138 return true;
139 }
140
141 bool
set_dead()142 Instr::set_dead()
143 {
144 if (m_instr_flags.test(always_keep))
145 return false;
146 bool is_dead = propagate_death();
147 m_instr_flags.set(dead);
148 return is_dead;
149 }
150
151 bool
propagate_death()152 Instr::propagate_death()
153 {
154 return true;
155 }
156
157 bool
replace_source(PRegister old_src,PVirtualValue new_src)158 Instr::replace_source(PRegister old_src, PVirtualValue new_src)
159 {
160 (void)old_src;
161 (void)new_src;
162 return false;
163 }
164
165 void
add_required_instr(Instr * instr)166 Instr::add_required_instr(Instr *instr)
167 {
168 assert(instr);
169 m_required_instr.push_back(instr);
170 instr->m_dependend_instr.push_back(this);
171 }
172
173 void
replace_required_instr(Instr * old_instr,Instr * new_instr)174 Instr::replace_required_instr(Instr *old_instr, Instr *new_instr)
175 {
176
177 for (auto i = m_required_instr.begin(); i != m_required_instr.end(); ++i) {
178 if (*i == old_instr)
179 *i = new_instr;
180 }
181 }
182
183 bool
replace_dest(PRegister new_dest,r600::AluInstr * move_instr)184 Instr::replace_dest(PRegister new_dest, r600::AluInstr *move_instr)
185 {
186 (void)new_dest;
187 (void)move_instr;
188 return false;
189 }
190
191 void
set_blockid(int id,int index)192 Instr::set_blockid(int id, int index)
193 {
194 m_block_id = id;
195 m_index = index;
196 forward_set_blockid(id, index);
197 }
198
199 void
forward_set_blockid(int id,int index)200 Instr::forward_set_blockid(int id, int index)
201 {
202 (void)id;
203 (void)index;
204 }
205
InstrWithVectorResult(const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle,int resource_base,PRegister resource_offset)206 InstrWithVectorResult::InstrWithVectorResult(const RegisterVec4& dest,
207 const RegisterVec4::Swizzle& dest_swizzle,
208 int resource_base,
209 PRegister resource_offset):
210 Resource(this, resource_base, resource_offset),
211 m_dest(dest),
212 m_dest_swizzle(dest_swizzle)
213 {
214 for (int i = 0; i < 4; ++i) {
215 if (m_dest_swizzle[i] < 6)
216 m_dest[i]->add_parent(this);
217 }
218 }
219
220 void
print_dest(std::ostream & os) const221 InstrWithVectorResult::print_dest(std::ostream& os) const
222 {
223 os << (m_dest[0]->has_flag(Register::ssa) ? 'S' : 'R') << m_dest.sel();
224 os << ".";
225 for (int i = 0; i < 4; ++i)
226 os << VirtualValue::chanchar[m_dest_swizzle[i]];
227 }
228
229 bool
comp_dest(const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle) const230 InstrWithVectorResult::comp_dest(const RegisterVec4& dest,
231 const RegisterVec4::Swizzle& dest_swizzle) const
232 {
233 for (int i = 0; i < 4; ++i) {
234 if (!m_dest[i]->equal_to(*dest[i])) {
235 return false;
236 }
237 if (m_dest_swizzle[i] != dest_swizzle[i])
238 return false;
239 }
240 return true;
241 }
242
243 void
do_print(std::ostream & os) const244 Block::do_print(std::ostream& os) const
245 {
246 for (int j = 0; j < 2 * m_nesting_depth; ++j)
247 os << ' ';
248 os << "BLOCK START\n";
249 for (auto& i : m_instructions) {
250 for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j)
251 os << ' ';
252 os << *i << "\n";
253 }
254 for (int j = 0; j < 2 * m_nesting_depth; ++j)
255 os << ' ';
256 os << "BLOCK END\n";
257 }
258
259 bool
is_equal_to(const Block & lhs) const260 Block::is_equal_to(const Block& lhs) const
261 {
262 if (m_id != lhs.m_id || m_nesting_depth != lhs.m_nesting_depth)
263 return false;
264
265 if (m_instructions.size() != lhs.m_instructions.size())
266 return false;
267
268 return std::inner_product(
269 m_instructions.begin(),
270 m_instructions.end(),
271 lhs.m_instructions.begin(),
272 true,
273 [](bool l, bool r) { return l && r; },
274 [](PInst l, PInst r) { return l->equal_to(*r); });
275 }
276
277 inline bool
operator !=(const Block & lhs,const Block & rhs)278 operator!=(const Block& lhs, const Block& rhs)
279 {
280 return !lhs.is_equal_to(rhs);
281 }
282
283 void
erase(iterator node)284 Block::erase(iterator node)
285 {
286 m_instructions.erase(node);
287 }
288
289 void
set_type(Type t,r600_chip_class chip_class)290 Block::set_type(Type t, r600_chip_class chip_class)
291 {
292 m_block_type = t;
293 switch (t) {
294 case vtx:
295 /* In theory on >= EG VTX support 16 slots, but with vertex fetch
296 * instructions the register pressure increases fast - i.e. in the worst
297 * case four register more get used, so stick to 8 slots for now.
298 * TODO: think about some trickery in the schedler to make use of up
299 * to 16 slots if the register pressure doesn't get too high.
300 */
301 m_remaining_slots = 8;
302 break;
303 case gds:
304 case tex:
305 m_remaining_slots = chip_class >= ISA_CC_EVERGREEN ? 16 : 8;
306 break;
307 case alu:
308 /* 128 but a follow up block might need to emit and ADDR + INDEX load */
309 m_remaining_slots = 118;
310 break;
311 default:
312 m_remaining_slots = 0xffff;
313 }
314 }
315
Block(int nesting_depth,int id)316 Block::Block(int nesting_depth, int id):
317 m_nesting_depth(nesting_depth),
318 m_id(id),
319 m_next_index(0)
320 {
321 assert(!has_instr_flag(force_cf));
322 }
323
324 void
accept(ConstInstrVisitor & visitor) const325 Block::accept(ConstInstrVisitor& visitor) const
326 {
327 visitor.visit(*this);
328 }
329
330 void
accept(InstrVisitor & visitor)331 Block::accept(InstrVisitor& visitor)
332 {
333 visitor.visit(this);
334 }
335
336 void
push_back(PInst instr)337 Block::push_back(PInst instr)
338 {
339 instr->set_blockid(m_id, m_next_index++);
340 if (m_remaining_slots != 0xffff) {
341 uint32_t new_slots = instr->slots();
342 m_remaining_slots -= new_slots;
343 }
344 if (m_lds_group_start)
345 m_lds_group_requirement += instr->slots();
346
347 m_instructions.push_back(instr);
348 }
349
350 Block::iterator
insert(const iterator pos,Instr * instr)351 Block::insert(const iterator pos, Instr *instr)
352 {
353 return m_instructions.insert(pos, instr);
354 }
355
356 bool
try_reserve_kcache(const AluGroup & group)357 Block::try_reserve_kcache(const AluGroup& group)
358 {
359 auto kcache = m_kcache;
360
361 auto kcache_constants = group.get_kconsts();
362 for (auto& kc : kcache_constants) {
363 auto u = kc->as_uniform();
364 assert(u);
365 if (!try_reserve_kcache(*u, kcache)) {
366 m_kcache_alloc_failed = true;
367 return false;
368 }
369 }
370
371 m_kcache = kcache;
372 m_kcache_alloc_failed = false;
373 return true;
374 }
375
376 bool
try_reserve_kcache(const AluInstr & instr)377 Block::try_reserve_kcache(const AluInstr& instr)
378 {
379 auto kcache = m_kcache;
380
381 for (auto& src : instr.sources()) {
382 auto u = src->as_uniform();
383 if (u) {
384 if (!try_reserve_kcache(*u, kcache)) {
385 m_kcache_alloc_failed = true;
386 return false;
387 }
388 }
389 }
390 m_kcache = kcache;
391 m_kcache_alloc_failed = false;
392 return true;
393 }
394
395 void
set_chipclass(r600_chip_class chip_class)396 Block::set_chipclass(r600_chip_class chip_class)
397 {
398 if (chip_class < ISA_CC_EVERGREEN)
399 s_max_kcache_banks = 2;
400 else
401 s_max_kcache_banks = 4;
402 }
403
404 unsigned Block::s_max_kcache_banks = 4;
405
406 bool
try_reserve_kcache(const UniformValue & u,std::array<KCacheLine,4> & kcache) const407 Block::try_reserve_kcache(const UniformValue& u, std::array<KCacheLine, 4>& kcache) const
408 {
409 const int kcache_banks = s_max_kcache_banks; // TODO: handle pre-evergreen
410
411 int bank = u.kcache_bank();
412 int sel = (u.sel() - 512);
413 int line = sel >> 4;
414 EBufferIndexMode index_mode = bim_none;
415
416 if (auto addr = u.buf_addr())
417 index_mode = addr->sel() == AddressRegister::idx0 ? bim_zero : bim_one;
418
419 bool found = false;
420
421 for (int i = 0; i < kcache_banks && !found; ++i) {
422 if (kcache[i].mode) {
423 if (kcache[i].bank < bank)
424 continue;
425
426
427 if (kcache[i].bank == bank &&
428 kcache[i].index_mode != bim_none &&
429 kcache[i].index_mode != index_mode) {
430 return false;
431 }
432 if ((kcache[i].bank == bank && kcache[i].addr > line + 1) ||
433 kcache[i].bank > bank) {
434 if (kcache[kcache_banks - 1].mode)
435 return false;
436
437 memmove(&kcache[i + 1],
438 &kcache[i],
439 (kcache_banks - i - 1) * sizeof(KCacheLine));
440 kcache[i].mode = KCacheLine::lock_1;
441 kcache[i].bank = bank;
442 kcache[i].addr = line;
443 kcache[i].index_mode = index_mode;
444 return true;
445 }
446
447 int d = line - kcache[i].addr;
448
449 if (d == -1) {
450 kcache[i].addr--;
451 if (kcache[i].mode == KCacheLine::lock_2) {
452 /* we are prepending the line to the current set,
453 * discarding the existing second line,
454 * so we'll have to insert line+2 after it */
455 line += 2;
456 continue;
457 } else if (kcache[i].mode == KCacheLine::lock_1) {
458 kcache[i].mode = KCacheLine::lock_2;
459 return true;
460 } else {
461 /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
462 return false;
463 }
464 } else if (d == 1) {
465 kcache[i].mode = KCacheLine::lock_2;
466 return true;
467 } else if (d == 0) {
468 return true;
469 }
470 } else { /* free kcache set - use it */
471 kcache[i].mode = KCacheLine::lock_1;
472 kcache[i].bank = bank;
473 kcache[i].addr = line;
474 kcache[i].index_mode = index_mode;
475 return true;
476 }
477 }
478 return false;
479 }
480
481 void
lds_group_start(AluInstr * alu)482 Block::lds_group_start(AluInstr *alu)
483 {
484 assert(!m_lds_group_start);
485 m_lds_group_start = alu;
486 m_lds_group_requirement = 0;
487 }
488
489 void
lds_group_end()490 Block::lds_group_end()
491 {
492 assert(m_lds_group_start);
493 m_lds_group_start->set_required_slots(m_lds_group_requirement);
494 m_lds_group_start = 0;
495 }
496
InstrWithVectorResult(const InstrWithVectorResult & orig)497 InstrWithVectorResult::InstrWithVectorResult(const InstrWithVectorResult& orig):
498 Resource(orig),
499 m_dest(orig.m_dest),
500 m_dest_swizzle(orig.m_dest_swizzle)
501 {
502 }
503
update_indirect_addr(UNUSED PRegister old_reg,PRegister addr)504 void InstrWithVectorResult::update_indirect_addr(UNUSED PRegister old_reg, PRegister addr)
505 {
506 set_resource_offset(addr);
507 }
508
509 class InstrComparer : public ConstInstrVisitor {
510 public:
511 InstrComparer() = default;
512 bool result{false};
513
514 #define DECLARE_MEMBER(TYPE) \
515 InstrComparer(const TYPE *instr) { this_##TYPE = instr; } \
516 \
517 void visit(const TYPE& instr) \
518 { \
519 result = false; \
520 if (!this_##TYPE) \
521 return; \
522 result = this_##TYPE->is_equal_to(instr); \
523 } \
524 \
525 const TYPE *this_##TYPE{nullptr};
526
527 DECLARE_MEMBER(AluInstr);
528 DECLARE_MEMBER(AluGroup);
529 DECLARE_MEMBER(TexInstr);
530 DECLARE_MEMBER(ExportInstr);
531 DECLARE_MEMBER(FetchInstr);
532 DECLARE_MEMBER(Block);
533 DECLARE_MEMBER(ControlFlowInstr);
534 DECLARE_MEMBER(IfInstr);
535 DECLARE_MEMBER(ScratchIOInstr);
536 DECLARE_MEMBER(StreamOutInstr);
537 DECLARE_MEMBER(MemRingOutInstr);
538 DECLARE_MEMBER(EmitVertexInstr);
539 DECLARE_MEMBER(GDSInstr);
540 DECLARE_MEMBER(WriteTFInstr);
541 DECLARE_MEMBER(LDSAtomicInstr);
542 DECLARE_MEMBER(LDSReadInstr);
543 DECLARE_MEMBER(RatInstr);
544 };
545
546 class InstrCompareForward : public ConstInstrVisitor {
547 public:
visit(const AluInstr & instr)548 void visit(const AluInstr& instr) override { m_comparer = InstrComparer(&instr); }
549
visit(const AluGroup & instr)550 void visit(const AluGroup& instr) override { m_comparer = InstrComparer(&instr); }
551
visit(const TexInstr & instr)552 void visit(const TexInstr& instr) override { m_comparer = InstrComparer(&instr); }
553
visit(const ExportInstr & instr)554 void visit(const ExportInstr& instr) override { m_comparer = InstrComparer(&instr); }
555
visit(const FetchInstr & instr)556 void visit(const FetchInstr& instr) override { m_comparer = InstrComparer(&instr); }
557
visit(const Block & instr)558 void visit(const Block& instr) override { m_comparer = InstrComparer(&instr); }
559
visit(const ControlFlowInstr & instr)560 void visit(const ControlFlowInstr& instr) override
561 {
562 m_comparer = InstrComparer(&instr);
563 }
564
visit(const IfInstr & instr)565 void visit(const IfInstr& instr) override { m_comparer = InstrComparer(&instr); }
566
visit(const ScratchIOInstr & instr)567 void visit(const ScratchIOInstr& instr) override
568 {
569 m_comparer = InstrComparer(&instr);
570 }
571
visit(const StreamOutInstr & instr)572 void visit(const StreamOutInstr& instr) override
573 {
574 m_comparer = InstrComparer(&instr);
575 }
576
visit(const MemRingOutInstr & instr)577 void visit(const MemRingOutInstr& instr) override
578 {
579 m_comparer = InstrComparer(&instr);
580 }
581
visit(const EmitVertexInstr & instr)582 void visit(const EmitVertexInstr& instr) override
583 {
584 m_comparer = InstrComparer(&instr);
585 }
586
visit(const GDSInstr & instr)587 void visit(const GDSInstr& instr) override { m_comparer = InstrComparer(&instr); }
588
visit(const WriteTFInstr & instr)589 void visit(const WriteTFInstr& instr) override { m_comparer = InstrComparer(&instr); }
590
visit(const LDSAtomicInstr & instr)591 void visit(const LDSAtomicInstr& instr) override
592 {
593 m_comparer = InstrComparer(&instr);
594 }
595
visit(const LDSReadInstr & instr)596 void visit(const LDSReadInstr& instr) override { m_comparer = InstrComparer(&instr); }
597
visit(const RatInstr & instr)598 void visit(const RatInstr& instr) override { m_comparer = InstrComparer(&instr); }
599
600 InstrComparer m_comparer;
601 };
602
603 bool
equal_to(const Instr & lhs) const604 Instr::equal_to(const Instr& lhs) const
605 {
606 InstrCompareForward cmp;
607 accept(cmp);
608 lhs.accept(cmp.m_comparer);
609
610 return cmp.m_comparer.result;
611 }
612
613 } // namespace r600
614