1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_instr_mem.h"
8
9 #include "nir_intrinsics.h"
10 #include "nir_intrinsics_indices.h"
11 #include "sfn_alu_defines.h"
12 #include "sfn_instr_alu.h"
13 #include "sfn_instr_fetch.h"
14 #include "sfn_instr_tex.h"
15 #include "sfn_shader.h"
16 #include "sfn_virtualvalues.h"
17
18 namespace r600 {
19
GDSInstr(ESDOp op,Register * dest,const RegisterVec4 & src,int uav_base,PRegister uav_id)20 GDSInstr::GDSInstr(
21 ESDOp op, Register *dest, const RegisterVec4& src, int uav_base, PRegister uav_id):
22 Resource(this, uav_base, uav_id),
23 m_op(op),
24 m_dest(dest),
25 m_src(src)
26 {
27 set_always_keep();
28
29 m_src.add_use(this);
30 if (m_dest)
31 m_dest->add_parent(this);
32 }
33
34 bool
is_equal_to(const GDSInstr & rhs) const35 GDSInstr::is_equal_to(const GDSInstr& rhs) const
36 {
37 #define NE(X) (X != rhs.X)
38
39 if (NE(m_op) || NE(m_src))
40 return false;
41
42 sfn_value_equal(m_dest, rhs.m_dest);
43
44 return resource_is_equal(rhs);
45 }
46
47 void
accept(ConstInstrVisitor & visitor) const48 GDSInstr::accept(ConstInstrVisitor& visitor) const
49 {
50 visitor.visit(*this);
51 }
52
53 void
accept(InstrVisitor & visitor)54 GDSInstr::accept(InstrVisitor& visitor)
55 {
56 visitor.visit(this);
57 }
58
59 bool
do_ready() const60 GDSInstr::do_ready() const
61 {
62 return m_src.ready(block_id(), index()) && resource_ready(block_id(), index());
63 }
64
65 void
do_print(std::ostream & os) const66 GDSInstr::do_print(std::ostream& os) const
67 {
68 os << "GDS " << lds_ops.at(m_op).name;
69 if (m_dest)
70 os << *m_dest;
71 else
72 os << "___";
73 os << " " << m_src;
74 os << " BASE:" << resource_id();
75
76 print_resource_offset(os);
77 }
78
79 bool
emit_atomic_counter(nir_intrinsic_instr * intr,Shader & shader)80 GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
81 {
82 switch (intr->intrinsic) {
83 case nir_intrinsic_atomic_counter_add:
84 case nir_intrinsic_atomic_counter_and:
85 case nir_intrinsic_atomic_counter_exchange:
86 case nir_intrinsic_atomic_counter_max:
87 case nir_intrinsic_atomic_counter_min:
88 case nir_intrinsic_atomic_counter_or:
89 case nir_intrinsic_atomic_counter_xor:
90 case nir_intrinsic_atomic_counter_comp_swap:
91 return emit_atomic_op2(intr, shader);
92 case nir_intrinsic_atomic_counter_read:
93 case nir_intrinsic_atomic_counter_post_dec:
94 return emit_atomic_read(intr, shader);
95 case nir_intrinsic_atomic_counter_inc:
96 return emit_atomic_inc(intr, shader);
97 case nir_intrinsic_atomic_counter_pre_dec:
98 return emit_atomic_pre_dec(intr, shader);
99 default:
100 return false;
101 }
102 }
103
allowed_src_chan_mask() const104 uint8_t GDSInstr::allowed_src_chan_mask() const
105 {
106 return m_src.free_chan_mask();
107 }
108
109 static ESDOp
get_opcode(const nir_intrinsic_op opcode)110 get_opcode(const nir_intrinsic_op opcode)
111 {
112 switch (opcode) {
113 case nir_intrinsic_atomic_counter_add:
114 return DS_OP_ADD_RET;
115 case nir_intrinsic_atomic_counter_and:
116 return DS_OP_AND_RET;
117 case nir_intrinsic_atomic_counter_exchange:
118 return DS_OP_XCHG_RET;
119 case nir_intrinsic_atomic_counter_inc:
120 return DS_OP_INC_RET;
121 case nir_intrinsic_atomic_counter_max:
122 return DS_OP_MAX_UINT_RET;
123 case nir_intrinsic_atomic_counter_min:
124 return DS_OP_MIN_UINT_RET;
125 case nir_intrinsic_atomic_counter_or:
126 return DS_OP_OR_RET;
127 case nir_intrinsic_atomic_counter_read:
128 return DS_OP_READ_RET;
129 case nir_intrinsic_atomic_counter_xor:
130 return DS_OP_XOR_RET;
131 case nir_intrinsic_atomic_counter_post_dec:
132 return DS_OP_DEC_RET;
133 case nir_intrinsic_atomic_counter_comp_swap:
134 return DS_OP_CMP_XCHG_RET;
135 case nir_intrinsic_atomic_counter_pre_dec:
136 default:
137 return DS_OP_INVALID;
138 }
139 }
140
141 static ESDOp
get_opcode_wo(const nir_intrinsic_op opcode)142 get_opcode_wo(const nir_intrinsic_op opcode)
143 {
144 switch (opcode) {
145 case nir_intrinsic_atomic_counter_add:
146 return DS_OP_ADD;
147 case nir_intrinsic_atomic_counter_and:
148 return DS_OP_AND;
149 case nir_intrinsic_atomic_counter_inc:
150 return DS_OP_INC;
151 case nir_intrinsic_atomic_counter_max:
152 return DS_OP_MAX_UINT;
153 case nir_intrinsic_atomic_counter_min:
154 return DS_OP_MIN_UINT;
155 case nir_intrinsic_atomic_counter_or:
156 return DS_OP_OR;
157 case nir_intrinsic_atomic_counter_xor:
158 return DS_OP_XOR;
159 case nir_intrinsic_atomic_counter_post_dec:
160 return DS_OP_DEC;
161 case nir_intrinsic_atomic_counter_comp_swap:
162 return DS_OP_CMP_XCHG_RET;
163 case nir_intrinsic_atomic_counter_exchange:
164 return DS_OP_XCHG_RET;
165 case nir_intrinsic_atomic_counter_pre_dec:
166 default:
167 return DS_OP_INVALID;
168 }
169 }
170
171 bool
emit_atomic_op2(nir_intrinsic_instr * instr,Shader & shader)172 GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
173 {
174 auto& vf = shader.value_factory();
175 bool read_result = !list_is_empty(&instr->def.uses);
176
177 ESDOp op =
178 read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
179
180 if (DS_OP_INVALID == op)
181 return false;
182
183 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
184 {
185 }
186 offset += nir_intrinsic_base(instr);
187
188 auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
189
190 PRegister src_as_register = nullptr;
191 auto src_val = vf.src(instr->src[1], 0);
192 if (!src_val->as_register()) {
193 auto temp_src_val = vf.temp_register();
194 shader.emit_instruction(
195 new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
196 src_as_register = temp_src_val;
197 } else
198 src_as_register = src_val->as_register();
199
200 if (uav_id != nullptr)
201 shader.set_flag(Shader::sh_indirect_atomic);
202
203 GDSInstr *ir = nullptr;
204 if (shader.chip_class() < ISA_CC_CAYMAN) {
205 RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
206 ir = new GDSInstr(op, dest, src, offset, uav_id);
207
208 } else {
209 auto dest = vf.dest(instr->def, 0, pin_free);
210 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
211 if (uav_id)
212 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
213 tmp[0],
214 uav_id,
215 vf.literal(4),
216 vf.literal(4 * offset),
217 AluInstr::write));
218 else
219 shader.emit_instruction(
220 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
221 shader.emit_instruction(
222 new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
223 ir = new GDSInstr(op, dest, tmp, 0, nullptr);
224 }
225 shader.emit_instruction(ir);
226 return true;
227 }
228
229 bool
emit_atomic_read(nir_intrinsic_instr * instr,Shader & shader)230 GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
231 {
232 auto& vf = shader.value_factory();
233
234 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
235 {
236 }
237 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
238
239 auto dest = vf.dest(instr->def, 0, pin_free);
240
241 GDSInstr *ir = nullptr;
242
243 if (shader.chip_class() < ISA_CC_CAYMAN) {
244 RegisterVec4 src = RegisterVec4(0, true, {7, 7, 7, 7});
245 ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
246 } else {
247 auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
248 if (uav_id)
249 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
250 tmp[0],
251 uav_id,
252 vf.literal(4),
253 vf.literal(4 * offset),
254 AluInstr::write));
255 else
256 shader.emit_instruction(
257 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
258
259 ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
260 }
261
262 shader.emit_instruction(ir);
263 return true;
264 }
265
266 bool
emit_atomic_inc(nir_intrinsic_instr * instr,Shader & shader)267 GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
268 {
269 auto& vf = shader.value_factory();
270 bool read_result = !list_is_empty(&instr->def.uses);
271
272 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
273 {
274 }
275 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
276
277 GDSInstr *ir = nullptr;
278 auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
279
280 if (shader.chip_class() < ISA_CC_CAYMAN) {
281 RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
282 ir =
283 new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, src, offset, uav_id);
284 } else {
285 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
286
287 if (uav_id)
288 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
289 tmp[0],
290 uav_id,
291 vf.literal(4),
292 vf.literal(4 * offset),
293 AluInstr::write));
294 else
295 shader.emit_instruction(
296 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
297
298 shader.emit_instruction(
299 new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
300 ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, tmp, 0, nullptr);
301 }
302 shader.emit_instruction(ir);
303 return true;
304 }
305
306 bool
emit_atomic_pre_dec(nir_intrinsic_instr * instr,Shader & shader)307 GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
308 {
309 auto& vf = shader.value_factory();
310
311 bool read_result = !list_is_empty(&instr->def.uses);
312
313 auto opcode = read_result ? DS_OP_SUB_RET : DS_OP_SUB;
314
315 auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
316 {
317 }
318 offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
319
320
321 auto *tmp_dest = read_result ? vf.temp_register() : nullptr;
322
323 GDSInstr *ir = nullptr;
324
325 if (shader.chip_class() < ISA_CC_CAYMAN) {
326 RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
327 ir = new GDSInstr(opcode, tmp_dest, src, offset, uav_id);
328 } else {
329 auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
330 if (uav_id)
331 shader.emit_instruction(new AluInstr(op3_muladd_uint24,
332 tmp[0],
333 uav_id,
334 vf.literal(4),
335 vf.literal(4 * offset),
336 AluInstr::write));
337 else
338 shader.emit_instruction(
339 new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
340
341 shader.emit_instruction(
342 new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
343 ir = new GDSInstr(opcode, tmp_dest, tmp, 0, nullptr);
344 }
345
346 shader.emit_instruction(ir);
347 if (read_result)
348 shader.emit_instruction(new AluInstr(op2_sub_int,
349 vf.dest(instr->def, 0, pin_free),
350 tmp_dest,
351 vf.one_i(),
352 AluInstr::last_write));
353 return true;
354 }
355
update_indirect_addr(PRegister old_reg,PRegister addr)356 void GDSInstr::update_indirect_addr(PRegister old_reg, PRegister addr)
357 {
358 (void)old_reg;
359 set_resource_offset(addr);
360 }
361
RatInstr(ECFOpCode cf_opcode,ERatOp rat_op,const RegisterVec4 & data,const RegisterVec4 & index,int rat_id,PRegister rat_id_offset,int burst_count,int comp_mask,int element_size)362 RatInstr::RatInstr(ECFOpCode cf_opcode,
363 ERatOp rat_op,
364 const RegisterVec4& data,
365 const RegisterVec4& index,
366 int rat_id,
367 PRegister rat_id_offset,
368 int burst_count,
369 int comp_mask,
370 int element_size):
371 Resource(this, rat_id, rat_id_offset),
372 m_cf_opcode(cf_opcode),
373 m_rat_op(rat_op),
374 m_data(data),
375 m_index(index),
376 m_burst_count(burst_count),
377 m_comp_mask(comp_mask),
378 m_element_size(element_size)
379 {
380 set_always_keep();
381 m_data.add_use(this);
382 m_index.add_use(this);
383 }
384
385 void
accept(ConstInstrVisitor & visitor) const386 RatInstr::accept(ConstInstrVisitor& visitor) const
387 {
388 visitor.visit(*this);
389 }
390
391 void
accept(InstrVisitor & visitor)392 RatInstr::accept(InstrVisitor& visitor)
393 {
394 visitor.visit(this);
395 }
396
397 bool
is_equal_to(const RatInstr & lhs) const398 RatInstr::is_equal_to(const RatInstr& lhs) const
399 {
400 (void)lhs;
401 assert(0);
402 return false;
403 }
404
405 bool
do_ready() const406 RatInstr::do_ready() const
407 {
408 if (m_rat_op != STORE_TYPED) {
409 for (auto i : required_instr()) {
410 if (!i->is_scheduled()) {
411 return false;
412 }
413 }
414 }
415
416 return m_data.ready(block_id(), index()) && m_index.ready(block_id(), index());
417 }
418
419 void
do_print(std::ostream & os) const420 RatInstr::do_print(std::ostream& os) const
421 {
422 os << "MEM_RAT RAT " << resource_id();
423 print_resource_offset(os);
424 os << " @" << m_index;
425 os << " OP:" << m_rat_op << " " << m_data;
426 os << " BC:" << m_burst_count << " MASK:" << m_comp_mask << " ES:" << m_element_size;
427 if (m_need_ack)
428 os << " ACK";
429 }
430
update_indirect_addr(UNUSED PRegister old_reg,PRegister addr)431 void RatInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister addr)
432 {
433 set_resource_offset(addr);
434 }
435
436 static RatInstr::ERatOp
get_rat_opcode(const nir_atomic_op opcode)437 get_rat_opcode(const nir_atomic_op opcode)
438 {
439 switch (opcode) {
440 case nir_atomic_op_iadd:
441 return RatInstr::ADD_RTN;
442 case nir_atomic_op_iand:
443 return RatInstr::AND_RTN;
444 case nir_atomic_op_ior:
445 return RatInstr::OR_RTN;
446 case nir_atomic_op_imin:
447 return RatInstr::MIN_INT_RTN;
448 case nir_atomic_op_imax:
449 return RatInstr::MAX_INT_RTN;
450 case nir_atomic_op_umin:
451 return RatInstr::MIN_UINT_RTN;
452 case nir_atomic_op_umax:
453 return RatInstr::MAX_UINT_RTN;
454 case nir_atomic_op_ixor:
455 return RatInstr::XOR_RTN;
456 case nir_atomic_op_cmpxchg:
457 return RatInstr::CMPXCHG_INT_RTN;
458 case nir_atomic_op_xchg:
459 return RatInstr::XCHG_RTN;
460 default:
461 unreachable("Unsupported atomic");
462 }
463 }
464
465 static RatInstr::ERatOp
get_rat_opcode_wo(const nir_atomic_op opcode)466 get_rat_opcode_wo(const nir_atomic_op opcode)
467 {
468 switch (opcode) {
469 case nir_atomic_op_iadd:
470 return RatInstr::ADD;
471 case nir_atomic_op_iand:
472 return RatInstr::AND;
473 case nir_atomic_op_ior:
474 return RatInstr::OR;
475 case nir_atomic_op_imin:
476 return RatInstr::MIN_INT;
477 case nir_atomic_op_imax:
478 return RatInstr::MAX_INT;
479 case nir_atomic_op_umin:
480 return RatInstr::MIN_UINT;
481 case nir_atomic_op_umax:
482 return RatInstr::MAX_UINT;
483 case nir_atomic_op_ixor:
484 return RatInstr::XOR;
485 case nir_atomic_op_cmpxchg:
486 return RatInstr::CMPXCHG_INT;
487 case nir_atomic_op_xchg:
488 return RatInstr::XCHG_RTN;
489 default:
490 unreachable("Unsupported atomic");
491 }
492 }
493
494 bool
emit(nir_intrinsic_instr * intr,Shader & shader)495 RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
496 {
497 switch (intr->intrinsic) {
498 case nir_intrinsic_load_ssbo:
499 return emit_ssbo_load(intr, shader);
500 case nir_intrinsic_store_ssbo:
501 return emit_ssbo_store(intr, shader);
502 case nir_intrinsic_ssbo_atomic:
503 case nir_intrinsic_ssbo_atomic_swap:
504 return emit_ssbo_atomic_op(intr, shader);
505 case nir_intrinsic_store_global:
506 return emit_global_store(intr, shader);
507 case nir_intrinsic_image_store:
508 return emit_image_store(intr, shader);
509 case nir_intrinsic_image_load:
510 case nir_intrinsic_image_atomic:
511 case nir_intrinsic_image_atomic_swap:
512 return emit_image_load_or_atomic(intr, shader);
513 case nir_intrinsic_image_size:
514 return emit_image_size(intr, shader);
515 case nir_intrinsic_image_samples:
516 return emit_image_samples(intr, shader);
517 case nir_intrinsic_get_ssbo_size:
518 return emit_ssbo_size(intr, shader);
519 default:
520 return false;
521 }
522 }
523
524 bool
emit_ssbo_load(nir_intrinsic_instr * intr,Shader & shader)525 RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
526 {
527 auto& vf = shader.value_factory();
528 auto dest = vf.dest_vec4(intr->def, pin_group);
529
530 /** src0 not used, should be some offset */
531 auto addr = vf.src(intr->src[1], 0);
532 auto addr_temp = vf.temp_register();
533
534 /** Should be lowered in nir */
535 shader.emit_instruction(new AluInstr(
536 op2_lshr_int, addr_temp, addr, vf.literal(2), {alu_write, alu_last_instr}));
537
538 const EVTXDataFormat formats[4] = {fmt_32, fmt_32_32, fmt_32_32_32, fmt_32_32_32_32};
539
540 RegisterVec4::Swizzle dest_swz[4] = {
541 {0, 7, 7, 7},
542 {0, 1, 7, 7},
543 {0, 1, 2, 7},
544 {0, 1, 2, 3}
545 };
546
547 int comp_idx = intr->def.num_components - 1;
548
549 auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0);
550 {
551 }
552
553 auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset + shader.ssbo_image_offset();
554
555 auto ir = new LoadFromBuffer(
556 dest, dest_swz[comp_idx], addr_temp, 0, res_id, res_offset, formats[comp_idx]);
557 ir->set_fetch_flag(FetchInstr::use_tc);
558 ir->set_num_format(vtx_nf_int);
559
560 shader.emit_instruction(ir);
561 return true;
562 }
563
564 bool
emit_global_store(nir_intrinsic_instr * intr,Shader & shader)565 RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader)
566 {
567 auto& vf = shader.value_factory();
568 auto addr_orig = vf.src(intr->src[1], 0);
569 auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7});
570
571 shader.emit_instruction(
572 new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2),
573 AluInstr::last_write));
574
575 RegisterVec4::Swizzle value_swz = {0,7,7,7};
576 auto mask = nir_intrinsic_write_mask(intr);
577 for (int i = 0; i < 4; ++i) {
578 if (mask & (1 << i))
579 value_swz[i] = i;
580 }
581
582 auto value_vec = vf.temp_vec4(pin_chgr, value_swz);
583
584 AluInstr *ir = nullptr;
585 for (int i = 0; i < 4; ++i) {
586 if (value_swz[i] < 4) {
587 ir = new AluInstr(op1_mov, value_vec[i],
588 vf.src(intr->src[0], i), AluInstr::write);
589 shader.emit_instruction(ir);
590 }
591 }
592 if (ir)
593 ir->set_alu_flag(alu_last_instr);
594
595 auto store = new RatInstr(cf_mem_rat_cacheless,
596 RatInstr::STORE_RAW,
597 value_vec,
598 addr_vec,
599 shader.ssbo_image_offset(),
600 nullptr,
601 1,
602 mask,
603 0);
604 shader.emit_instruction(store);
605 return true;
606 }
607
608 bool
emit_ssbo_store(nir_intrinsic_instr * instr,Shader & shader)609 RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
610 {
611 auto& vf = shader.value_factory();
612 auto orig_addr = vf.src(instr->src[2], 0);
613
614 auto addr_base = vf.temp_register();
615
616 auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
617
618 shader.emit_instruction(
619 new AluInstr(op2_lshr_int, addr_base, orig_addr, vf.literal(2), AluInstr::write));
620
621 for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
622 auto addr_vec = vf.temp_vec4(pin_group, {0, 1, 2, 7});
623 if (i == 0) {
624 shader.emit_instruction(
625 new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
626 } else {
627 shader.emit_instruction(new AluInstr(
628 op2_add_int, addr_vec[0], addr_base, vf.literal(i), AluInstr::last_write));
629 }
630 auto value = vf.src(instr->src[0], i);
631 PRegister v = vf.temp_register(0);
632 shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
633 auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
634 auto store = new RatInstr(cf_mem_rat,
635 RatInstr::STORE_TYPED,
636 value_vec,
637 addr_vec,
638 offset + shader.ssbo_image_offset(),
639 rat_id,
640 1,
641 1,
642 0);
643 shader.emit_instruction(store);
644 }
645
646 return true;
647 }
648
649 bool
emit_ssbo_atomic_op(nir_intrinsic_instr * intr,Shader & shader)650 RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
651 {
652 auto& vf = shader.value_factory();
653 auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0);
654 {
655 }
656
657 bool read_result = !list_is_empty(&intr->def.uses);
658 auto opcode = read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intr))
659 : get_rat_opcode_wo(nir_intrinsic_atomic_op(intr));
660
661 auto coord_orig = vf.src(intr->src[1], 0);
662 auto coord = vf.temp_register(0);
663
664 auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
665
666 shader.emit_instruction(
667 new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
668
669 shader.emit_instruction(
670 new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
671
672 if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap) {
673 shader.emit_instruction(
674 new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
675 shader.emit_instruction(
676 new AluInstr(op1_mov,
677 data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
678 vf.src(intr->src[2], 0),
679 {alu_last_instr, alu_write}));
680 } else {
681 shader.emit_instruction(new AluInstr(
682 op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
683 }
684
685 RegisterVec4 out_vec(coord, coord, coord, coord, pin_chgr);
686
687 auto atomic = new RatInstr(cf_mem_rat,
688 opcode,
689 data_vec4,
690 out_vec,
691 imageid + shader.ssbo_image_offset(),
692 image_offset,
693 1,
694 0xf,
695 0);
696 shader.emit_instruction(atomic);
697
698 atomic->set_ack();
699 if (read_result) {
700 atomic->set_instr_flag(ack_rat_return_write);
701 auto dest = vf.dest_vec4(intr->def, pin_group);
702
703 auto fetch = new FetchInstr(vc_fetch,
704 dest,
705 {0, 1, 2, 3},
706 shader.rat_return_address(),
707 0,
708 no_index_offset,
709 fmt_32,
710 vtx_nf_int,
711 vtx_es_none,
712 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
713 image_offset);
714 fetch->set_mfc(15);
715 fetch->set_fetch_flag(FetchInstr::srf_mode);
716 fetch->set_fetch_flag(FetchInstr::use_tc);
717 fetch->set_fetch_flag(FetchInstr::vpm);
718 fetch->set_fetch_flag(FetchInstr::wait_ack);
719 fetch->add_required_instr(atomic);
720 shader.chain_ssbo_read(fetch);
721 shader.emit_instruction(fetch);
722 }
723
724 return true;
725 }
726
727 bool
emit_ssbo_size(nir_intrinsic_instr * intr,Shader & shader)728 RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
729 {
730 auto& vf = shader.value_factory();
731 auto dest = vf.dest_vec4(intr->def, pin_group);
732
733 auto const_offset = nir_src_as_const_value(intr->src[0]);
734 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
735 if (const_offset)
736 res_id += const_offset[0].u32;
737 else
738 assert(0 && "dynamic buffer offset not supported in buffer_size");
739
740 shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
741 return true;
742 }
743
744 bool
emit_image_store(nir_intrinsic_instr * intrin,Shader & shader)745 RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
746 {
747 auto& vf = shader.value_factory();
748 auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
749 {
750 }
751
752 auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
753 auto coord = vf.temp_vec4(pin_chgr);
754
755 auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
756 auto value = vf.temp_vec4(pin_chgr);
757
758 RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
759 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
760 nir_intrinsic_image_array(intrin))
761 swizzle = {0, 2, 1, 3};
762
763 for (int i = 0; i < 4; ++i) {
764 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
765 shader.emit_instruction(
766 new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
767 }
768 for (int i = 0; i < 4; ++i) {
769 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
770 shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
771 }
772
773 auto op = cf_mem_rat; // nir_intrinsic_access(intrin) & ACCESS_COHERENT ?
774 // cf_mem_rat_cacheless : cf_mem_rat;
775 auto store = new RatInstr(
776 op, RatInstr::STORE_TYPED, value, coord, imageid, image_offset, 1, 0xf, 0);
777
778 store->set_ack();
779 if (nir_intrinsic_access(intrin) & ACCESS_INCLUDE_HELPERS)
780 store->set_instr_flag(Instr::helper);
781
782 shader.emit_instruction(store);
783 return true;
784 }
785
786 bool
emit_image_load_or_atomic(nir_intrinsic_instr * intrin,Shader & shader)787 RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
788 {
789 auto& vf = shader.value_factory();
790 auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
791 {
792 }
793
794 bool read_result = !list_is_empty(&intrin->def.uses);
795 bool image_load = (intrin->intrinsic == nir_intrinsic_image_load);
796 auto opcode = image_load ? RatInstr::NOP_RTN :
797 read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intrin))
798 : get_rat_opcode_wo(nir_intrinsic_atomic_op(intrin));
799
800 auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan);
801 auto coord = vf.temp_vec4(pin_chgr);
802
803 auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
804
805 RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
806 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
807 nir_intrinsic_image_array(intrin))
808 swizzle = {0, 2, 1, 3};
809
810 for (int i = 0; i < 4; ++i) {
811 auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
812 shader.emit_instruction(
813 new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
814 }
815
816 shader.emit_instruction(
817 new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
818
819 if (intrin->intrinsic == nir_intrinsic_image_atomic_swap) {
820 shader.emit_instruction(
821 new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
822 shader.emit_instruction(
823 new AluInstr(op1_mov,
824 data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
825 vf.src(intrin->src[3], 0),
826 AluInstr::last_write));
827 } else {
828 shader.emit_instruction(
829 new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[3], 0), AluInstr::write));
830 shader.emit_instruction(
831 new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
832 }
833
834 auto atomic =
835 new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid, image_offset, 1, 0xf, 0);
836 shader.emit_instruction(atomic);
837
838 atomic->set_ack();
839 if (read_result) {
840 atomic->set_instr_flag(ack_rat_return_write);
841 auto dest = vf.dest_vec4(intrin->def, pin_group);
842
843 pipe_format format = nir_intrinsic_format(intrin);
844 unsigned fmt = fmt_32;
845 unsigned num_format = 0;
846 unsigned format_comp = 0;
847 unsigned endian = 0;
848 r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
849
850 auto fetch = new FetchInstr(vc_fetch,
851 dest,
852 {0, 1, 2, 3},
853 shader.rat_return_address(),
854 0,
855 no_index_offset,
856 (EVTXDataFormat)fmt,
857 (EVFetchNumFormat)num_format,
858 (EVFetchEndianSwap)endian,
859 R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
860 image_offset);
861 fetch->set_mfc(3);
862 fetch->set_fetch_flag(FetchInstr::srf_mode);
863 fetch->set_fetch_flag(FetchInstr::use_tc);
864 fetch->set_fetch_flag(FetchInstr::vpm);
865 fetch->set_fetch_flag(FetchInstr::wait_ack);
866 if (format_comp)
867 fetch->set_fetch_flag(FetchInstr::format_comp_signed);
868
869 shader.chain_ssbo_read(fetch);
870 shader.emit_instruction(fetch);
871 }
872
873 return true;
874 }
875
876 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
877
878 bool
emit_image_size(nir_intrinsic_instr * intrin,Shader & shader)879 RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
880 {
881 auto& vf = shader.value_factory();
882
883 auto src = RegisterVec4(0, true, {4, 4, 4, 4});
884
885 assert(nir_src_as_uint(intrin->src[1]) == 0);
886
887 auto const_offset = nir_src_as_const_value(intrin->src[0]);
888 PRegister dyn_offset = nullptr;
889
890 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
891 if (const_offset)
892 res_id += const_offset[0].u32;
893 else
894 dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
895
896 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
897 auto dest = vf.dest_vec4(intrin->def, pin_group);
898 shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
899 return true;
900 } else {
901
902 if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
903 nir_intrinsic_image_array(intrin) &&
904 intrin->def.num_components > 2) {
905 /* Need to load the layers from a const buffer */
906
907 auto dest = vf.dest_vec4(intrin->def, pin_group);
908 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
909 dest,
910 {0, 1, 7, 3},
911 src,
912 res_id,
913 dyn_offset));
914
915 shader.set_flag(Shader::sh_txs_cube_array_comp);
916
917 if (const_offset) {
918 unsigned lookup_resid = const_offset[0].u32 + shader.image_size_const_offset();
919 shader.emit_instruction(
920 new AluInstr(op1_mov,
921 dest[2],
922 vf.uniform(lookup_resid / 4 + R600_SHADER_BUFFER_INFO_SEL,
923 lookup_resid % 4,
924 R600_BUFFER_INFO_CONST_BUFFER),
925 AluInstr::last_write));
926 } else {
927 /* If the addressing is indirect we have to get the z-value by
928 * using a binary search */
929 auto addr = vf.temp_register();
930 auto comp1 = vf.temp_register();
931 auto comp2 = vf.temp_register();
932 auto low_bit = vf.temp_register();
933 auto high_bit = vf.temp_register();
934
935 auto trgt = vf.temp_vec4(pin_group);
936
937 shader.emit_instruction(new AluInstr(op2_lshr_int,
938 addr,
939 vf.src(intrin->src[0], 0),
940 vf.literal(2),
941 AluInstr::write));
942 shader.emit_instruction(new AluInstr(op2_and_int,
943 low_bit,
944 vf.src(intrin->src[0], 0),
945 vf.one_i(),
946 AluInstr::write));
947 shader.emit_instruction(new AluInstr(op2_and_int,
948 high_bit,
949 vf.src(intrin->src[0], 0),
950 vf.literal(2),
951 AluInstr::last_write));
952
953 shader.emit_instruction(new LoadFromBuffer(trgt,
954 {0, 1, 2, 3},
955 addr,
956 R600_SHADER_BUFFER_INFO_SEL,
957 R600_BUFFER_INFO_CONST_BUFFER,
958 nullptr,
959 fmt_32_32_32_32_float));
960
961 // this may be wrong
962 shader.emit_instruction(new AluInstr(
963 op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], AluInstr::write));
964 shader.emit_instruction(new AluInstr(
965 op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], AluInstr::last_write));
966 shader.emit_instruction(new AluInstr(
967 op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
968 }
969 } else {
970 auto dest = vf.dest_vec4(intrin->def, pin_group);
971 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
972 dest,
973 {0, 1, 2, 3},
974 src,
975 res_id,
976 dyn_offset));
977 }
978 }
979 return true;
980 }
981
982 bool
emit_image_samples(nir_intrinsic_instr * intrin,Shader & shader)983 RatInstr::emit_image_samples(nir_intrinsic_instr *intrin, Shader& shader)
984 {
985 auto& vf = shader.value_factory();
986
987 auto src = RegisterVec4(0, true, {4, 4, 4, 4});
988
989 auto tmp = shader.value_factory().temp_vec4(pin_group);
990 auto dest = shader.value_factory().dest(intrin->def, 0, pin_free);
991
992 auto const_offset = nir_src_as_const_value(intrin->src[0]);
993 PRegister dyn_offset = nullptr;
994
995 int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
996 if (const_offset)
997 res_id += const_offset[0].u32;
998 else
999 dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
1000
1001 shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
1002 tmp,
1003 {3, 7, 7, 7},
1004 src,
1005 res_id,
1006 dyn_offset));
1007
1008 shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::last_write));
1009 return true;
1010 }
1011
1012 } // namespace r600
1013