1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_instr_lds.h"
8
9 #include "sfn_debug.h"
10 #include "sfn_instr_alu.h"
11
12 namespace r600 {
13
14 using std::istream;
15
LDSReadInstr(std::vector<PRegister,Allocator<PRegister>> & value,AluInstr::SrcValues & address)16 LDSReadInstr::LDSReadInstr(std::vector<PRegister, Allocator<PRegister>>& value,
17 AluInstr::SrcValues& address):
18 m_address(address),
19 m_dest_value(value)
20 {
21 assert(m_address.size() == m_dest_value.size());
22
23 for (auto& v : value)
24 v->add_parent(this);
25
26 for (auto& s : m_address)
27 if (s->as_register())
28 s->as_register()->add_use(this);
29 }
30
31 void
accept(ConstInstrVisitor & visitor) const32 LDSReadInstr::accept(ConstInstrVisitor& visitor) const
33 {
34 visitor.visit(*this);
35 }
36
37 void
accept(InstrVisitor & visitor)38 LDSReadInstr::accept(InstrVisitor& visitor)
39 {
40 visitor.visit(this);
41 }
42
43 bool
remove_unused_components()44 LDSReadInstr::remove_unused_components()
45 {
46 uint8_t inactive_mask = 0;
47 for (size_t i = 0; i < m_dest_value.size(); ++i) {
48 if (m_dest_value[i]->uses().empty())
49 inactive_mask |= 1 << i;
50 }
51
52 if (!inactive_mask)
53 return false;
54
55 auto new_addr = AluInstr::SrcValues();
56 auto new_dest = std::vector<PRegister, Allocator<PRegister>>();
57
58 for (size_t i = 0; i < m_dest_value.size(); ++i) {
59 if ((1 << i) & inactive_mask) {
60 if (m_address[i]->as_register())
61 m_address[i]->as_register()->del_use(this);
62 m_dest_value[i]->del_parent(this);
63 } else {
64 new_dest.push_back(m_dest_value[i]);
65 new_addr.push_back(m_address[i]);
66 }
67 }
68
69 m_dest_value.swap(new_dest);
70 m_address.swap(new_addr);
71
72 return m_address.size() != new_addr.size();
73 }
74
75 class SetLDSAddrProperty : public AluInstrVisitor {
76 using AluInstrVisitor::visit;
visit(AluInstr * instr)77 void visit(AluInstr *instr) override { instr->set_alu_flag(alu_lds_address); }
78 };
79
80 AluInstr *
split(std::vector<AluInstr * > & out_block,AluInstr * last_lds_instr)81 LDSReadInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr)
82 {
83 AluInstr *first_instr = nullptr;
84 SetLDSAddrProperty prop;
85 for (auto& addr : m_address) {
86 auto reg = addr->as_register();
87 if (reg) {
88 reg->del_use(this);
89 if (reg->parents().size() == 1) {
90 for (auto& p : reg->parents()) {
91 p->accept(prop);
92 }
93 }
94 }
95
96 auto instr = new AluInstr(DS_OP_READ_RET, nullptr, nullptr, addr);
97 instr->set_blockid(block_id(), index());
98
99 if (last_lds_instr)
100 instr->add_required_instr(last_lds_instr);
101 out_block.push_back(instr);
102 last_lds_instr = instr;
103 if (!first_instr) {
104 first_instr = instr;
105 first_instr->set_alu_flag(alu_lds_group_start);
106 } else {
107 /* In order to make it possible that the scheduler
108 * keeps the loads of a group close together, we
109 * require that the addresses are all already available
110 * when the first read instruction is emitted.
111 * Otherwise it might happen that the loads and reads from the
112 * queue are split across ALU cf clauses, and this is not allowed */
113 first_instr->add_extra_dependency(addr);
114 }
115 }
116
117 for (auto& dest : m_dest_value) {
118 dest->del_parent(this);
119 auto instr = new AluInstr(op1_mov,
120 dest,
121 new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
122 AluInstr::last_write);
123 instr->add_required_instr(last_lds_instr);
124 instr->set_blockid(block_id(), index());
125 instr->set_always_keep();
126 out_block.push_back(instr);
127 last_lds_instr = instr;
128 }
129 if (last_lds_instr)
130 last_lds_instr->set_alu_flag(alu_lds_group_end);
131
132 return last_lds_instr;
133 }
134
135 bool
do_ready() const136 LDSReadInstr::do_ready() const
137 {
138 unreachable("This instruction is not handled by the scheduler");
139 return false;
140 }
141
142 void
do_print(std::ostream & os) const143 LDSReadInstr::do_print(std::ostream& os) const
144 {
145 os << "LDS_READ ";
146
147 os << "[ ";
148 for (auto d : m_dest_value) {
149 os << *d << " ";
150 }
151 os << "] : [ ";
152 for (auto a : m_address) {
153 os << *a << " ";
154 }
155 os << "]";
156 }
157
158 bool
is_equal_to(const LDSReadInstr & rhs) const159 LDSReadInstr::is_equal_to(const LDSReadInstr& rhs) const
160 {
161 if (m_address.size() != rhs.m_address.size())
162 return false;
163
164 for (unsigned i = 0; i < num_values(); ++i) {
165 if (!m_address[i]->equal_to(*rhs.m_address[i]))
166 return false;
167 if (!m_dest_value[i]->equal_to(*rhs.m_dest_value[i]))
168 return false;
169 }
170 return true;
171 }
172
173 auto
from_string(istream & is,ValueFactory & value_factory)174 LDSReadInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
175 {
176 /* LDS_READ [ d1, d2, d3 ... ] : a1 a2 a3 ... */
177
178 std::string temp_str;
179
180 is >> temp_str;
181 assert(temp_str == "[");
182
183 std::vector<PRegister, Allocator<PRegister>> dests;
184 AluInstr::SrcValues srcs;
185
186 is >> temp_str;
187 while (temp_str != "]") {
188 auto dst = value_factory.dest_from_string(temp_str);
189 assert(dst);
190 dests.push_back(dst);
191 is >> temp_str;
192 }
193
194 is >> temp_str;
195 assert(temp_str == ":");
196 is >> temp_str;
197 assert(temp_str == "[");
198
199 is >> temp_str;
200 while (temp_str != "]") {
201 auto src = value_factory.src_from_string(temp_str);
202 assert(src);
203 srcs.push_back(src);
204 is >> temp_str;
205 };
206 assert(srcs.size() == dests.size() && !dests.empty());
207
208 return new LDSReadInstr(dests, srcs);
209 }
210
replace_dest(PRegister new_dest,AluInstr * move_instr)211 bool LDSReadInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
212 {
213 if (new_dest->pin() == pin_array)
214 return false;
215
216 auto old_dest = move_instr->psrc(0);
217
218 bool success = false;
219
220 for (unsigned i = 0; i < m_dest_value.size(); ++i) {
221 auto& dest = m_dest_value[i];
222
223 if (!dest->equal_to(*old_dest))
224 continue;
225
226 if (dest->equal_to(*new_dest))
227 continue;
228
229 if (dest->uses().size() > 1)
230 continue;
231
232 if (dest->pin() == pin_fully)
233 continue;
234
235 if (dest->pin() == pin_group)
236 continue;
237
238 if (dest->pin() == pin_chan && new_dest->chan() != dest->chan())
239 continue;
240
241 if (dest->pin() == pin_chan) {
242 if (new_dest->pin() == pin_group)
243 new_dest->set_pin(pin_chgr);
244 else
245 new_dest->set_pin(pin_chan);
246 }
247 m_dest_value[i] = new_dest;
248 success = true;
249 }
250 return success;
251 }
252
LDSAtomicInstr(ESDOp op,PRegister dest,PVirtualValue address,const SrcValues & srcs)253 LDSAtomicInstr::LDSAtomicInstr(ESDOp op,
254 PRegister dest,
255 PVirtualValue address,
256 const SrcValues& srcs):
257 m_opcode(op),
258 m_address(address),
259 m_dest(dest),
260 m_srcs(srcs)
261 {
262 if (m_dest)
263 m_dest->add_parent(this);
264
265 if (m_address->as_register())
266 m_address->as_register()->add_use(this);
267
268 for (auto& s : m_srcs) {
269 if (s->as_register())
270 s->as_register()->add_use(this);
271 }
272 }
273
274 void
accept(ConstInstrVisitor & visitor) const275 LDSAtomicInstr::accept(ConstInstrVisitor& visitor) const
276 {
277 visitor.visit(*this);
278 }
279
280 void
accept(InstrVisitor & visitor)281 LDSAtomicInstr::accept(InstrVisitor& visitor)
282 {
283 visitor.visit(this);
284 }
285
286 AluInstr *
split(std::vector<AluInstr * > & out_block,AluInstr * last_lds_instr)287 LDSAtomicInstr::split(std::vector<AluInstr *>& out_block, AluInstr *last_lds_instr)
288 {
289 AluInstr::SrcValues srcs = {m_address};
290
291 for (auto& s : m_srcs)
292 srcs.push_back(s);
293
294 for (auto& s : srcs) {
295 if (s->as_register())
296 s->as_register()->del_use(this);
297 }
298
299 SetLDSAddrProperty prop;
300 auto reg = srcs[0]->as_register();
301 if (reg) {
302 reg->del_use(this);
303 if (reg->parents().size() == 1) {
304 for (auto& p : reg->parents()) {
305 p->accept(prop);
306 }
307 }
308 }
309
310 auto op_instr = new AluInstr(m_opcode, srcs, {});
311 op_instr->set_blockid(block_id(), index());
312
313 if (last_lds_instr) {
314 op_instr->add_required_instr(last_lds_instr);
315 }
316 last_lds_instr = op_instr;
317
318 out_block.push_back(op_instr);
319 if (m_dest) {
320 op_instr->set_alu_flag(alu_lds_group_start);
321 m_dest->del_parent(this);
322 auto read_instr = new AluInstr(op1_mov,
323 m_dest,
324 new InlineConstant(ALU_SRC_LDS_OQ_A_POP),
325 AluInstr::last_write);
326 read_instr->add_required_instr(op_instr);
327 read_instr->set_blockid(block_id(), index());
328 read_instr->set_alu_flag(alu_lds_group_end);
329 out_block.push_back(read_instr);
330 last_lds_instr = read_instr;
331 }
332 return last_lds_instr;
333 }
334
335 bool
replace_source(PRegister old_src,PVirtualValue new_src)336 LDSAtomicInstr::replace_source(PRegister old_src, PVirtualValue new_src)
337 {
338 bool process = false;
339
340 if (new_src->as_uniform()) {
341 if (m_srcs.size() > 2) {
342 int nconst = 0;
343 for (auto& s : m_srcs) {
344 if (s->as_uniform() && !s->equal_to(*old_src))
345 ++nconst;
346 }
347 /* Conservative check: with two kcache values can always live,
348 * tree might be a problem, don't care for now, just reject
349 */
350 if (nconst > 2)
351 return false;
352 }
353
354 /* indirect constant buffer access means new CF, and this is something
355 * we can't do in the middle of an LDS read group */
356 auto u = new_src->as_uniform();
357 if (u->buf_addr())
358 return false;
359 }
360
361 /* If the source is an array element, we assume that there
362 * might have been an (untracked) indirect access, so don't replace
363 * this source */
364 if (old_src->pin() == pin_array || new_src->pin() == pin_array)
365 return false;
366
367 for (unsigned i = 0; i < m_srcs.size(); ++i) {
368 if (old_src->equal_to(*m_srcs[i])) {
369 m_srcs[i] = new_src;
370 process = true;
371 }
372 }
373
374 if (process) {
375 auto r = new_src->as_register();
376 if (r)
377 r->add_use(this);
378 old_src->del_use(this);
379 }
380 return process;
381 }
382
383 bool
do_ready() const384 LDSAtomicInstr::do_ready() const
385 {
386 unreachable("This instruction is not handled by the scheduler");
387 return false;
388 }
389
390 void
do_print(std::ostream & os) const391 LDSAtomicInstr::do_print(std::ostream& os) const
392 {
393 auto ii = lds_ops.find(m_opcode);
394 assert(ii != lds_ops.end());
395
396 os << "LDS " << ii->second.name << " ";
397 if (m_dest)
398 os << *m_dest;
399 else
400 os << "__.x";
401
402 os << " [ " << *m_address << " ] : " << *m_srcs[0];
403 if (m_srcs.size() > 1)
404 os << " " << *m_srcs[1];
405 }
406
407 bool
is_equal_to(const LDSAtomicInstr & rhs) const408 LDSAtomicInstr::is_equal_to(const LDSAtomicInstr& rhs) const
409 {
410 if (m_srcs.size() != rhs.m_srcs.size())
411 return false;
412
413 for (unsigned i = 0; i < m_srcs.size(); ++i) {
414 if (!m_srcs[i]->equal_to(*rhs.m_srcs[i]))
415 return false;
416 }
417
418 return m_opcode == rhs.m_opcode && sfn_value_equal(m_address, rhs.m_address) &&
419 sfn_value_equal(m_dest, rhs.m_dest);
420 }
421
422 auto
from_string(istream & is,ValueFactory & value_factory)423 LDSAtomicInstr::from_string(istream& is, ValueFactory& value_factory) -> Pointer
424 {
425 /* LDS WRITE2 __.x [ R1.x ] : R2.y R3.z */
426 /* LDS WRITE __.x [ R1.x ] : R2.y */
427 /* LDS ATOMIC_ADD_RET [ R5.y ] : R2.y */
428
429 std::string temp_str;
430
431 is >> temp_str;
432
433 ESDOp opcode = DS_OP_INVALID;
434 int nsrc = 0;
435
436 for (auto& [op, opinfo] : lds_ops) {
437 if (temp_str == opinfo.name) {
438 opcode = op;
439 nsrc = opinfo.nsrc;
440 break;
441 }
442 }
443
444 assert(opcode != DS_OP_INVALID);
445
446 is >> temp_str;
447
448 PRegister dest = nullptr;
449 if (temp_str[0] != '_')
450 dest = value_factory.dest_from_string(temp_str);
451
452 is >> temp_str;
453 assert(temp_str == "[");
454 is >> temp_str;
455 auto addr = value_factory.src_from_string(temp_str);
456
457 is >> temp_str;
458 assert(temp_str == "]");
459
460 is >> temp_str;
461 assert(temp_str == ":");
462
463 AluInstr::SrcValues srcs;
464 for (int i = 0; i < nsrc - 1; ++i) {
465 is >> temp_str;
466 auto src = value_factory.src_from_string(temp_str);
467 assert(src);
468 srcs.push_back(src);
469 }
470
471 return new LDSAtomicInstr(opcode, dest, addr, srcs);
472 }
473
474 } // namespace r600
475