xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_instr_mem.h"
8 
9 #include "nir_intrinsics.h"
10 #include "nir_intrinsics_indices.h"
11 #include "sfn_alu_defines.h"
12 #include "sfn_instr_alu.h"
13 #include "sfn_instr_fetch.h"
14 #include "sfn_instr_tex.h"
15 #include "sfn_shader.h"
16 #include "sfn_virtualvalues.h"
17 
18 namespace r600 {
19 
GDSInstr(ESDOp op,Register * dest,const RegisterVec4 & src,int uav_base,PRegister uav_id)20 GDSInstr::GDSInstr(
21    ESDOp op, Register *dest, const RegisterVec4& src, int uav_base, PRegister uav_id):
22     Resource(this, uav_base, uav_id),
23     m_op(op),
24     m_dest(dest),
25     m_src(src)
26 {
27    set_always_keep();
28 
29    m_src.add_use(this);
30    if (m_dest)
31       m_dest->add_parent(this);
32 }
33 
34 bool
is_equal_to(const GDSInstr & rhs) const35 GDSInstr::is_equal_to(const GDSInstr& rhs) const
36 {
37 #define NE(X) (X != rhs.X)
38 
39    if (NE(m_op) || NE(m_src))
40       return false;
41 
42    sfn_value_equal(m_dest, rhs.m_dest);
43 
44    return resource_is_equal(rhs);
45 }
46 
47 void
accept(ConstInstrVisitor & visitor) const48 GDSInstr::accept(ConstInstrVisitor& visitor) const
49 {
50    visitor.visit(*this);
51 }
52 
53 void
accept(InstrVisitor & visitor)54 GDSInstr::accept(InstrVisitor& visitor)
55 {
56    visitor.visit(this);
57 }
58 
59 bool
do_ready() const60 GDSInstr::do_ready() const
61 {
62    return m_src.ready(block_id(), index()) && resource_ready(block_id(), index());
63 }
64 
65 void
do_print(std::ostream & os) const66 GDSInstr::do_print(std::ostream& os) const
67 {
68    os << "GDS " << lds_ops.at(m_op).name;
69    if (m_dest)
70       os << *m_dest;
71    else
72       os << "___";
73    os << " " << m_src;
74    os << " BASE:" << resource_id();
75 
76    print_resource_offset(os);
77 }
78 
79 bool
emit_atomic_counter(nir_intrinsic_instr * intr,Shader & shader)80 GDSInstr::emit_atomic_counter(nir_intrinsic_instr *intr, Shader& shader)
81 {
82    switch (intr->intrinsic) {
83    case nir_intrinsic_atomic_counter_add:
84    case nir_intrinsic_atomic_counter_and:
85    case nir_intrinsic_atomic_counter_exchange:
86    case nir_intrinsic_atomic_counter_max:
87    case nir_intrinsic_atomic_counter_min:
88    case nir_intrinsic_atomic_counter_or:
89    case nir_intrinsic_atomic_counter_xor:
90    case nir_intrinsic_atomic_counter_comp_swap:
91       return emit_atomic_op2(intr, shader);
92    case nir_intrinsic_atomic_counter_read:
93    case nir_intrinsic_atomic_counter_post_dec:
94       return emit_atomic_read(intr, shader);
95    case nir_intrinsic_atomic_counter_inc:
96       return emit_atomic_inc(intr, shader);
97    case nir_intrinsic_atomic_counter_pre_dec:
98       return emit_atomic_pre_dec(intr, shader);
99    default:
100       return false;
101    }
102 }
103 
allowed_src_chan_mask() const104 uint8_t GDSInstr::allowed_src_chan_mask() const
105 {
106    return m_src.free_chan_mask();
107 }
108 
109 static ESDOp
get_opcode(const nir_intrinsic_op opcode)110 get_opcode(const nir_intrinsic_op opcode)
111 {
112    switch (opcode) {
113    case nir_intrinsic_atomic_counter_add:
114       return DS_OP_ADD_RET;
115    case nir_intrinsic_atomic_counter_and:
116       return DS_OP_AND_RET;
117    case nir_intrinsic_atomic_counter_exchange:
118       return DS_OP_XCHG_RET;
119    case nir_intrinsic_atomic_counter_inc:
120       return DS_OP_INC_RET;
121    case nir_intrinsic_atomic_counter_max:
122       return DS_OP_MAX_UINT_RET;
123    case nir_intrinsic_atomic_counter_min:
124       return DS_OP_MIN_UINT_RET;
125    case nir_intrinsic_atomic_counter_or:
126       return DS_OP_OR_RET;
127    case nir_intrinsic_atomic_counter_read:
128       return DS_OP_READ_RET;
129    case nir_intrinsic_atomic_counter_xor:
130       return DS_OP_XOR_RET;
131    case nir_intrinsic_atomic_counter_post_dec:
132       return DS_OP_DEC_RET;
133    case nir_intrinsic_atomic_counter_comp_swap:
134       return DS_OP_CMP_XCHG_RET;
135    case nir_intrinsic_atomic_counter_pre_dec:
136    default:
137       return DS_OP_INVALID;
138    }
139 }
140 
141 static ESDOp
get_opcode_wo(const nir_intrinsic_op opcode)142 get_opcode_wo(const nir_intrinsic_op opcode)
143 {
144    switch (opcode) {
145    case nir_intrinsic_atomic_counter_add:
146       return DS_OP_ADD;
147    case nir_intrinsic_atomic_counter_and:
148       return DS_OP_AND;
149    case nir_intrinsic_atomic_counter_inc:
150       return DS_OP_INC;
151    case nir_intrinsic_atomic_counter_max:
152       return DS_OP_MAX_UINT;
153    case nir_intrinsic_atomic_counter_min:
154       return DS_OP_MIN_UINT;
155    case nir_intrinsic_atomic_counter_or:
156       return DS_OP_OR;
157    case nir_intrinsic_atomic_counter_xor:
158       return DS_OP_XOR;
159    case nir_intrinsic_atomic_counter_post_dec:
160       return DS_OP_DEC;
161    case nir_intrinsic_atomic_counter_comp_swap:
162       return DS_OP_CMP_XCHG_RET;
163    case nir_intrinsic_atomic_counter_exchange:
164       return DS_OP_XCHG_RET;
165    case nir_intrinsic_atomic_counter_pre_dec:
166    default:
167       return DS_OP_INVALID;
168    }
169 }
170 
171 bool
emit_atomic_op2(nir_intrinsic_instr * instr,Shader & shader)172 GDSInstr::emit_atomic_op2(nir_intrinsic_instr *instr, Shader& shader)
173 {
174    auto& vf = shader.value_factory();
175    bool read_result = !list_is_empty(&instr->def.uses);
176 
177    ESDOp op =
178       read_result ? get_opcode(instr->intrinsic) : get_opcode_wo(instr->intrinsic);
179 
180    if (DS_OP_INVALID == op)
181       return false;
182 
183    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
184    {
185    }
186    offset += nir_intrinsic_base(instr);
187 
188    auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
189 
190    PRegister src_as_register = nullptr;
191    auto src_val = vf.src(instr->src[1], 0);
192    if (!src_val->as_register()) {
193       auto temp_src_val = vf.temp_register();
194       shader.emit_instruction(
195          new AluInstr(op1_mov, temp_src_val, src_val, AluInstr::last_write));
196       src_as_register = temp_src_val;
197    } else
198       src_as_register = src_val->as_register();
199 
200    if (uav_id != nullptr)
201       shader.set_flag(Shader::sh_indirect_atomic);
202 
203    GDSInstr *ir = nullptr;
204    if (shader.chip_class() < ISA_CC_CAYMAN) {
205       RegisterVec4 src(nullptr, src_as_register, nullptr, nullptr, pin_free);
206       ir = new GDSInstr(op, dest, src, offset, uav_id);
207 
208    } else {
209       auto dest = vf.dest(instr->def, 0, pin_free);
210       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
211       if (uav_id)
212          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
213                                               tmp[0],
214                                               uav_id,
215                                               vf.literal(4),
216                                               vf.literal(4 * offset),
217                                               AluInstr::write));
218       else
219          shader.emit_instruction(
220             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
221       shader.emit_instruction(
222          new AluInstr(op1_mov, tmp[1], src_val, AluInstr::last_write));
223       ir = new GDSInstr(op, dest, tmp, 0, nullptr);
224    }
225    shader.emit_instruction(ir);
226    return true;
227 }
228 
229 bool
emit_atomic_read(nir_intrinsic_instr * instr,Shader & shader)230 GDSInstr::emit_atomic_read(nir_intrinsic_instr *instr, Shader& shader)
231 {
232    auto& vf = shader.value_factory();
233 
234    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
235    {
236    }
237    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
238 
239    auto dest = vf.dest(instr->def, 0, pin_free);
240 
241    GDSInstr *ir = nullptr;
242 
243    if (shader.chip_class() < ISA_CC_CAYMAN) {
244       RegisterVec4 src = RegisterVec4(0, true, {7, 7, 7, 7});
245       ir = new GDSInstr(DS_OP_READ_RET, dest, src, offset, uav_id);
246    } else {
247       auto tmp = vf.temp_vec4(pin_group, {0, 7, 7, 7});
248       if (uav_id)
249          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
250                                               tmp[0],
251                                               uav_id,
252                                               vf.literal(4),
253                                               vf.literal(4 * offset),
254                                               AluInstr::write));
255       else
256          shader.emit_instruction(
257             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
258 
259       ir = new GDSInstr(DS_OP_READ_RET, dest, tmp, 0, nullptr);
260    }
261 
262    shader.emit_instruction(ir);
263    return true;
264 }
265 
266 bool
emit_atomic_inc(nir_intrinsic_instr * instr,Shader & shader)267 GDSInstr::emit_atomic_inc(nir_intrinsic_instr *instr, Shader& shader)
268 {
269    auto& vf = shader.value_factory();
270    bool read_result = !list_is_empty(&instr->def.uses);
271 
272    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
273    {
274    }
275    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
276 
277    GDSInstr *ir = nullptr;
278    auto dest = read_result ? vf.dest(instr->def, 0, pin_free) : nullptr;
279 
280    if (shader.chip_class() < ISA_CC_CAYMAN) {
281             RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
282       ir =
283          new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, src, offset, uav_id);
284    } else {
285       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
286 
287       if (uav_id)
288          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
289                                               tmp[0],
290                                               uav_id,
291                                               vf.literal(4),
292                                               vf.literal(4 * offset),
293                                               AluInstr::write));
294       else
295          shader.emit_instruction(
296             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
297 
298       shader.emit_instruction(
299          new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
300       ir = new GDSInstr(read_result ? DS_OP_ADD_RET : DS_OP_ADD, dest, tmp, 0, nullptr);
301    }
302    shader.emit_instruction(ir);
303    return true;
304 }
305 
306 bool
emit_atomic_pre_dec(nir_intrinsic_instr * instr,Shader & shader)307 GDSInstr::emit_atomic_pre_dec(nir_intrinsic_instr *instr, Shader& shader)
308 {
309    auto& vf = shader.value_factory();
310 
311    bool read_result = !list_is_empty(&instr->def.uses);
312 
313    auto opcode = read_result ? DS_OP_SUB_RET : DS_OP_SUB;
314 
315    auto [offset, uav_id] = shader.evaluate_resource_offset(instr, 0);
316    {
317    }
318    offset += shader.remap_atomic_base(nir_intrinsic_base(instr));
319 
320 
321    auto *tmp_dest = read_result ? vf.temp_register() : nullptr;
322 
323    GDSInstr *ir = nullptr;
324 
325    if (shader.chip_class() < ISA_CC_CAYMAN) {
326       RegisterVec4 src(nullptr, shader.atomic_update(), nullptr, nullptr, pin_chan);
327       ir = new GDSInstr(opcode, tmp_dest, src, offset, uav_id);
328    } else {
329       auto tmp = vf.temp_vec4(pin_group, {0, 1, 7, 7});
330       if (uav_id)
331          shader.emit_instruction(new AluInstr(op3_muladd_uint24,
332                                               tmp[0],
333                                               uav_id,
334                                               vf.literal(4),
335                                               vf.literal(4 * offset),
336                                               AluInstr::write));
337       else
338          shader.emit_instruction(
339             new AluInstr(op1_mov, tmp[0], vf.literal(4 * offset), AluInstr::write));
340 
341       shader.emit_instruction(
342          new AluInstr(op1_mov, tmp[1], shader.atomic_update(), AluInstr::last_write));
343       ir = new GDSInstr(opcode, tmp_dest, tmp, 0, nullptr);
344    }
345 
346    shader.emit_instruction(ir);
347    if (read_result)
348       shader.emit_instruction(new AluInstr(op2_sub_int,
349                                            vf.dest(instr->def, 0, pin_free),
350                                            tmp_dest,
351                                            vf.one_i(),
352                                            AluInstr::last_write));
353    return true;
354 }
355 
update_indirect_addr(PRegister old_reg,PRegister addr)356 void GDSInstr::update_indirect_addr(PRegister old_reg, PRegister addr)
357 {
358    (void)old_reg;
359    set_resource_offset(addr);
360 }
361 
RatInstr(ECFOpCode cf_opcode,ERatOp rat_op,const RegisterVec4 & data,const RegisterVec4 & index,int rat_id,PRegister rat_id_offset,int burst_count,int comp_mask,int element_size)362 RatInstr::RatInstr(ECFOpCode cf_opcode,
363                    ERatOp rat_op,
364                    const RegisterVec4& data,
365                    const RegisterVec4& index,
366                    int rat_id,
367                    PRegister rat_id_offset,
368                    int burst_count,
369                    int comp_mask,
370                    int element_size):
371     Resource(this, rat_id, rat_id_offset),
372     m_cf_opcode(cf_opcode),
373     m_rat_op(rat_op),
374     m_data(data),
375     m_index(index),
376     m_burst_count(burst_count),
377     m_comp_mask(comp_mask),
378     m_element_size(element_size)
379 {
380    set_always_keep();
381    m_data.add_use(this);
382    m_index.add_use(this);
383 }
384 
385 void
accept(ConstInstrVisitor & visitor) const386 RatInstr::accept(ConstInstrVisitor& visitor) const
387 {
388    visitor.visit(*this);
389 }
390 
391 void
accept(InstrVisitor & visitor)392 RatInstr::accept(InstrVisitor& visitor)
393 {
394    visitor.visit(this);
395 }
396 
397 bool
is_equal_to(const RatInstr & lhs) const398 RatInstr::is_equal_to(const RatInstr& lhs) const
399 {
400    (void)lhs;
401    assert(0);
402    return false;
403 }
404 
405 bool
do_ready() const406 RatInstr::do_ready() const
407 {
408    if (m_rat_op != STORE_TYPED) {
409       for (auto i : required_instr()) {
410          if (!i->is_scheduled()) {
411             return false;
412          }
413       }
414    }
415 
416    return m_data.ready(block_id(), index()) && m_index.ready(block_id(), index());
417 }
418 
419 void
do_print(std::ostream & os) const420 RatInstr::do_print(std::ostream& os) const
421 {
422    os << "MEM_RAT RAT " << resource_id();
423    print_resource_offset(os);
424    os << " @" << m_index;
425    os << " OP:" << m_rat_op << " " << m_data;
426    os << " BC:" << m_burst_count << " MASK:" << m_comp_mask << " ES:" << m_element_size;
427    if (m_need_ack)
428       os << " ACK";
429 }
430 
update_indirect_addr(UNUSED PRegister old_reg,PRegister addr)431 void RatInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister addr)
432 {
433    set_resource_offset(addr);
434 }
435 
436 static RatInstr::ERatOp
get_rat_opcode(const nir_atomic_op opcode)437 get_rat_opcode(const nir_atomic_op opcode)
438 {
439    switch (opcode) {
440    case nir_atomic_op_iadd:
441       return RatInstr::ADD_RTN;
442    case nir_atomic_op_iand:
443       return RatInstr::AND_RTN;
444    case nir_atomic_op_ior:
445       return RatInstr::OR_RTN;
446    case nir_atomic_op_imin:
447       return RatInstr::MIN_INT_RTN;
448    case nir_atomic_op_imax:
449       return RatInstr::MAX_INT_RTN;
450    case nir_atomic_op_umin:
451       return RatInstr::MIN_UINT_RTN;
452    case nir_atomic_op_umax:
453       return RatInstr::MAX_UINT_RTN;
454    case nir_atomic_op_ixor:
455       return RatInstr::XOR_RTN;
456    case nir_atomic_op_cmpxchg:
457       return RatInstr::CMPXCHG_INT_RTN;
458    case nir_atomic_op_xchg:
459       return RatInstr::XCHG_RTN;
460    default:
461       unreachable("Unsupported atomic");
462    }
463 }
464 
465 static RatInstr::ERatOp
get_rat_opcode_wo(const nir_atomic_op opcode)466 get_rat_opcode_wo(const nir_atomic_op opcode)
467 {
468    switch (opcode) {
469    case nir_atomic_op_iadd:
470       return RatInstr::ADD;
471    case nir_atomic_op_iand:
472       return RatInstr::AND;
473    case nir_atomic_op_ior:
474       return RatInstr::OR;
475    case nir_atomic_op_imin:
476       return RatInstr::MIN_INT;
477    case nir_atomic_op_imax:
478       return RatInstr::MAX_INT;
479    case nir_atomic_op_umin:
480       return RatInstr::MIN_UINT;
481    case nir_atomic_op_umax:
482       return RatInstr::MAX_UINT;
483    case nir_atomic_op_ixor:
484       return RatInstr::XOR;
485    case nir_atomic_op_cmpxchg:
486       return RatInstr::CMPXCHG_INT;
487    case nir_atomic_op_xchg:
488       return RatInstr::XCHG_RTN;
489    default:
490       unreachable("Unsupported atomic");
491    }
492 }
493 
494 bool
emit(nir_intrinsic_instr * intr,Shader & shader)495 RatInstr::emit(nir_intrinsic_instr *intr, Shader& shader)
496 {
497    switch (intr->intrinsic) {
498    case nir_intrinsic_load_ssbo:
499       return emit_ssbo_load(intr, shader);
500    case nir_intrinsic_store_ssbo:
501       return emit_ssbo_store(intr, shader);
502    case nir_intrinsic_ssbo_atomic:
503    case nir_intrinsic_ssbo_atomic_swap:
504       return emit_ssbo_atomic_op(intr, shader);
505    case nir_intrinsic_store_global:
506       return emit_global_store(intr, shader);
507    case nir_intrinsic_image_store:
508       return emit_image_store(intr, shader);
509    case nir_intrinsic_image_load:
510    case nir_intrinsic_image_atomic:
511    case nir_intrinsic_image_atomic_swap:
512       return emit_image_load_or_atomic(intr, shader);
513    case nir_intrinsic_image_size:
514       return emit_image_size(intr, shader);
515    case nir_intrinsic_image_samples:
516       return emit_image_samples(intr, shader);
517    case nir_intrinsic_get_ssbo_size:
518       return emit_ssbo_size(intr, shader);
519    default:
520       return false;
521    }
522 }
523 
524 bool
emit_ssbo_load(nir_intrinsic_instr * intr,Shader & shader)525 RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader)
526 {
527    auto& vf = shader.value_factory();
528    auto dest = vf.dest_vec4(intr->def, pin_group);
529 
530    /** src0 not used, should be some offset */
531    auto addr = vf.src(intr->src[1], 0);
532    auto addr_temp = vf.temp_register();
533 
534    /** Should be lowered in nir */
535    shader.emit_instruction(new AluInstr(
536       op2_lshr_int, addr_temp, addr, vf.literal(2), {alu_write, alu_last_instr}));
537 
538    const EVTXDataFormat formats[4] = {fmt_32, fmt_32_32, fmt_32_32_32, fmt_32_32_32_32};
539 
540    RegisterVec4::Swizzle dest_swz[4] = {
541       {0, 7, 7, 7},
542       {0, 1, 7, 7},
543       {0, 1, 2, 7},
544       {0, 1, 2, 3}
545    };
546 
547    int comp_idx = intr->def.num_components - 1;
548 
549    auto [offset, res_offset] = shader.evaluate_resource_offset(intr, 0);
550    {
551    }
552 
553    auto res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + offset + shader.ssbo_image_offset();
554 
555    auto ir = new LoadFromBuffer(
556       dest, dest_swz[comp_idx], addr_temp, 0, res_id, res_offset, formats[comp_idx]);
557    ir->set_fetch_flag(FetchInstr::use_tc);
558    ir->set_num_format(vtx_nf_int);
559 
560    shader.emit_instruction(ir);
561    return true;
562 }
563 
564 bool
emit_global_store(nir_intrinsic_instr * intr,Shader & shader)565 RatInstr::emit_global_store(nir_intrinsic_instr *intr, Shader& shader)
566 {
567    auto& vf = shader.value_factory();
568    auto addr_orig = vf.src(intr->src[1], 0);
569    auto addr_vec = vf.temp_vec4(pin_chan, {0, 7, 7, 7});
570 
571    shader.emit_instruction(
572       new AluInstr(op2_lshr_int, addr_vec[0], addr_orig, vf.literal(2),
573                    AluInstr::last_write));
574 
575    RegisterVec4::Swizzle value_swz = {0,7,7,7};
576    auto mask = nir_intrinsic_write_mask(intr);
577    for (int i = 0; i < 4; ++i) {
578       if (mask & (1 << i))
579          value_swz[i] = i;
580    }
581 
582    auto value_vec = vf.temp_vec4(pin_chgr, value_swz);
583 
584    AluInstr *ir = nullptr;
585    for (int i = 0; i < 4; ++i) {
586       if (value_swz[i] < 4) {
587          ir = new AluInstr(op1_mov, value_vec[i],
588                            vf.src(intr->src[0], i), AluInstr::write);
589          shader.emit_instruction(ir);
590       }
591    }
592    if (ir)
593       ir->set_alu_flag(alu_last_instr);
594 
595    auto store = new RatInstr(cf_mem_rat_cacheless,
596                              RatInstr::STORE_RAW,
597                              value_vec,
598                              addr_vec,
599                              shader.ssbo_image_offset(),
600                              nullptr,
601                              1,
602                              mask,
603                              0);
604    shader.emit_instruction(store);
605    return true;
606 }
607 
608 bool
emit_ssbo_store(nir_intrinsic_instr * instr,Shader & shader)609 RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader)
610 {
611    auto& vf = shader.value_factory();
612    auto orig_addr = vf.src(instr->src[2], 0);
613 
614    auto addr_base = vf.temp_register();
615 
616    auto [offset, rat_id] = shader.evaluate_resource_offset(instr, 1);
617 
618    shader.emit_instruction(
619       new AluInstr(op2_lshr_int, addr_base, orig_addr, vf.literal(2), AluInstr::write));
620 
621    for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
622       auto addr_vec = vf.temp_vec4(pin_group, {0, 1, 2, 7});
623       if (i == 0) {
624          shader.emit_instruction(
625             new AluInstr(op1_mov, addr_vec[0], addr_base, AluInstr::last_write));
626       } else {
627          shader.emit_instruction(new AluInstr(
628             op2_add_int, addr_vec[0], addr_base, vf.literal(i), AluInstr::last_write));
629       }
630       auto value = vf.src(instr->src[0], i);
631       PRegister v = vf.temp_register(0);
632       shader.emit_instruction(new AluInstr(op1_mov, v, value, AluInstr::last_write));
633       auto value_vec = RegisterVec4(v, nullptr, nullptr, nullptr, pin_chan);
634       auto store = new RatInstr(cf_mem_rat,
635                                 RatInstr::STORE_TYPED,
636                                 value_vec,
637                                 addr_vec,
638                                 offset + shader.ssbo_image_offset(),
639                                 rat_id,
640                                 1,
641                                 1,
642                                 0);
643       shader.emit_instruction(store);
644    }
645 
646    return true;
647 }
648 
649 bool
emit_ssbo_atomic_op(nir_intrinsic_instr * intr,Shader & shader)650 RatInstr::emit_ssbo_atomic_op(nir_intrinsic_instr *intr, Shader& shader)
651 {
652    auto& vf = shader.value_factory();
653    auto [imageid, image_offset] = shader.evaluate_resource_offset(intr, 0);
654    {
655    }
656 
657    bool read_result = !list_is_empty(&intr->def.uses);
658    auto opcode = read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intr))
659                              : get_rat_opcode_wo(nir_intrinsic_atomic_op(intr));
660 
661    auto coord_orig = vf.src(intr->src[1], 0);
662    auto coord = vf.temp_register(0);
663 
664    auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
665 
666    shader.emit_instruction(
667       new AluInstr(op2_lshr_int, coord, coord_orig, vf.literal(2), AluInstr::last_write));
668 
669    shader.emit_instruction(
670       new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
671 
672    if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap) {
673       shader.emit_instruction(
674          new AluInstr(op1_mov, data_vec4[0], vf.src(intr->src[3], 0), AluInstr::write));
675       shader.emit_instruction(
676          new AluInstr(op1_mov,
677                       data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
678                       vf.src(intr->src[2], 0),
679                       {alu_last_instr, alu_write}));
680    } else {
681       shader.emit_instruction(new AluInstr(
682          op1_mov, data_vec4[0], vf.src(intr->src[2], 0), AluInstr::last_write));
683    }
684 
685    RegisterVec4 out_vec(coord, coord, coord, coord, pin_chgr);
686 
687    auto atomic = new RatInstr(cf_mem_rat,
688                               opcode,
689                               data_vec4,
690                               out_vec,
691                               imageid + shader.ssbo_image_offset(),
692                               image_offset,
693                               1,
694                               0xf,
695                               0);
696    shader.emit_instruction(atomic);
697 
698    atomic->set_ack();
699    if (read_result) {
700       atomic->set_instr_flag(ack_rat_return_write);
701       auto dest = vf.dest_vec4(intr->def, pin_group);
702 
703       auto fetch = new FetchInstr(vc_fetch,
704                                   dest,
705                                   {0, 1, 2, 3},
706                                   shader.rat_return_address(),
707                                   0,
708                                   no_index_offset,
709                                   fmt_32,
710                                   vtx_nf_int,
711                                   vtx_es_none,
712                                   R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
713                                   image_offset);
714       fetch->set_mfc(15);
715       fetch->set_fetch_flag(FetchInstr::srf_mode);
716       fetch->set_fetch_flag(FetchInstr::use_tc);
717       fetch->set_fetch_flag(FetchInstr::vpm);
718       fetch->set_fetch_flag(FetchInstr::wait_ack);
719       fetch->add_required_instr(atomic);
720       shader.chain_ssbo_read(fetch);
721       shader.emit_instruction(fetch);
722    }
723 
724    return true;
725 }
726 
727 bool
emit_ssbo_size(nir_intrinsic_instr * intr,Shader & shader)728 RatInstr::emit_ssbo_size(nir_intrinsic_instr *intr, Shader& shader)
729 {
730    auto& vf = shader.value_factory();
731    auto dest = vf.dest_vec4(intr->def, pin_group);
732 
733    auto const_offset = nir_src_as_const_value(intr->src[0]);
734    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
735    if (const_offset)
736       res_id += const_offset[0].u32;
737    else
738       assert(0 && "dynamic buffer offset not supported in buffer_size");
739 
740    shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
741    return true;
742 }
743 
744 bool
emit_image_store(nir_intrinsic_instr * intrin,Shader & shader)745 RatInstr::emit_image_store(nir_intrinsic_instr *intrin, Shader& shader)
746 {
747    auto& vf = shader.value_factory();
748    auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
749    {
750    }
751 
752    auto coord_load = vf.src_vec4(intrin->src[1], pin_chan);
753    auto coord = vf.temp_vec4(pin_chgr);
754 
755    auto value_load = vf.src_vec4(intrin->src[3], pin_chan);
756    auto value = vf.temp_vec4(pin_chgr);
757 
758    RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
759    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
760        nir_intrinsic_image_array(intrin))
761       swizzle = {0, 2, 1, 3};
762 
763    for (int i = 0; i < 4; ++i) {
764       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
765       shader.emit_instruction(
766          new AluInstr(op1_mov, coord[swizzle[i]], coord_load[i], flags));
767    }
768    for (int i = 0; i < 4; ++i) {
769       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
770       shader.emit_instruction(new AluInstr(op1_mov, value[i], value_load[i], flags));
771    }
772 
773    auto op = cf_mem_rat; // nir_intrinsic_access(intrin) & ACCESS_COHERENT ?
774                          // cf_mem_rat_cacheless : cf_mem_rat;
775    auto store = new RatInstr(
776       op, RatInstr::STORE_TYPED, value, coord, imageid, image_offset, 1, 0xf, 0);
777 
778    store->set_ack();
779    if (nir_intrinsic_access(intrin) & ACCESS_INCLUDE_HELPERS)
780       store->set_instr_flag(Instr::helper);
781 
782    shader.emit_instruction(store);
783    return true;
784 }
785 
786 bool
emit_image_load_or_atomic(nir_intrinsic_instr * intrin,Shader & shader)787 RatInstr::emit_image_load_or_atomic(nir_intrinsic_instr *intrin, Shader& shader)
788 {
789    auto& vf = shader.value_factory();
790    auto [imageid, image_offset] = shader.evaluate_resource_offset(intrin, 0);
791    {
792    }
793 
794    bool read_result = !list_is_empty(&intrin->def.uses);
795    bool image_load = (intrin->intrinsic == nir_intrinsic_image_load);
796    auto opcode = image_load  ? RatInstr::NOP_RTN :
797                  read_result ? get_rat_opcode(nir_intrinsic_atomic_op(intrin))
798                              : get_rat_opcode_wo(nir_intrinsic_atomic_op(intrin));
799 
800    auto coord_orig = vf.src_vec4(intrin->src[1], pin_chan);
801    auto coord = vf.temp_vec4(pin_chgr);
802 
803    auto data_vec4 = vf.temp_vec4(pin_chgr, {0, 1, 2, 3});
804 
805    RegisterVec4::Swizzle swizzle = {0, 1, 2, 3};
806    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
807        nir_intrinsic_image_array(intrin))
808       swizzle = {0, 2, 1, 3};
809 
810    for (int i = 0; i < 4; ++i) {
811       auto flags = i != 3 ? AluInstr::write : AluInstr::last_write;
812       shader.emit_instruction(
813          new AluInstr(op1_mov, coord[swizzle[i]], coord_orig[i], flags));
814    }
815 
816    shader.emit_instruction(
817       new AluInstr(op1_mov, data_vec4[1], shader.rat_return_address(), AluInstr::write));
818 
819    if (intrin->intrinsic == nir_intrinsic_image_atomic_swap) {
820       shader.emit_instruction(
821          new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[4], 0), AluInstr::write));
822       shader.emit_instruction(
823          new AluInstr(op1_mov,
824                       data_vec4[shader.chip_class() == ISA_CC_CAYMAN ? 2 : 3],
825                       vf.src(intrin->src[3], 0),
826                       AluInstr::last_write));
827    } else {
828       shader.emit_instruction(
829          new AluInstr(op1_mov, data_vec4[0], vf.src(intrin->src[3], 0), AluInstr::write));
830       shader.emit_instruction(
831          new AluInstr(op1_mov, data_vec4[2], vf.zero(), AluInstr::last_write));
832    }
833 
834    auto atomic =
835       new RatInstr(cf_mem_rat, opcode, data_vec4, coord, imageid, image_offset, 1, 0xf, 0);
836    shader.emit_instruction(atomic);
837 
838    atomic->set_ack();
839    if (read_result) {
840       atomic->set_instr_flag(ack_rat_return_write);
841       auto dest = vf.dest_vec4(intrin->def, pin_group);
842 
843       pipe_format format = nir_intrinsic_format(intrin);
844       unsigned fmt = fmt_32;
845       unsigned num_format = 0;
846       unsigned format_comp = 0;
847       unsigned endian = 0;
848       r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
849 
850       auto fetch = new FetchInstr(vc_fetch,
851                                   dest,
852                                   {0, 1, 2, 3},
853                                   shader.rat_return_address(),
854                                   0,
855                                   no_index_offset,
856                                   (EVTXDataFormat)fmt,
857                                   (EVFetchNumFormat)num_format,
858                                   (EVFetchEndianSwap)endian,
859                                   R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
860                                   image_offset);
861       fetch->set_mfc(3);
862       fetch->set_fetch_flag(FetchInstr::srf_mode);
863       fetch->set_fetch_flag(FetchInstr::use_tc);
864       fetch->set_fetch_flag(FetchInstr::vpm);
865       fetch->set_fetch_flag(FetchInstr::wait_ack);
866       if (format_comp)
867          fetch->set_fetch_flag(FetchInstr::format_comp_signed);
868 
869       shader.chain_ssbo_read(fetch);
870       shader.emit_instruction(fetch);
871    }
872 
873    return true;
874 }
875 
876 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
877 
878 bool
emit_image_size(nir_intrinsic_instr * intrin,Shader & shader)879 RatInstr::emit_image_size(nir_intrinsic_instr *intrin, Shader& shader)
880 {
881    auto& vf = shader.value_factory();
882 
883    auto src = RegisterVec4(0, true, {4, 4, 4, 4});
884 
885    assert(nir_src_as_uint(intrin->src[1]) == 0);
886 
887    auto const_offset = nir_src_as_const_value(intrin->src[0]);
888    PRegister dyn_offset = nullptr;
889 
890    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
891    if (const_offset)
892       res_id += const_offset[0].u32;
893    else
894       dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
895 
896    if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
897       auto dest = vf.dest_vec4(intrin->def, pin_group);
898       shader.emit_instruction(new QueryBufferSizeInstr(dest, {0, 1, 2, 3}, res_id));
899       return true;
900    } else {
901 
902       if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
903           nir_intrinsic_image_array(intrin) &&
904           intrin->def.num_components > 2) {
905          /* Need to load the layers from a const buffer */
906 
907          auto dest = vf.dest_vec4(intrin->def, pin_group);
908          shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
909                                               dest,
910                                               {0, 1, 7, 3},
911                                               src,
912                                               res_id,
913                                               dyn_offset));
914 
915          shader.set_flag(Shader::sh_txs_cube_array_comp);
916 
917          if (const_offset) {
918             unsigned lookup_resid = const_offset[0].u32 + shader.image_size_const_offset();
919             shader.emit_instruction(
920                new AluInstr(op1_mov,
921                             dest[2],
922                             vf.uniform(lookup_resid / 4 + R600_SHADER_BUFFER_INFO_SEL,
923                                        lookup_resid % 4,
924                                        R600_BUFFER_INFO_CONST_BUFFER),
925                             AluInstr::last_write));
926          } else {
927             /* If the addressing is indirect we have to get the z-value by
928              * using a binary search */
929             auto addr = vf.temp_register();
930             auto comp1 = vf.temp_register();
931             auto comp2 = vf.temp_register();
932             auto low_bit = vf.temp_register();
933             auto high_bit = vf.temp_register();
934 
935             auto trgt = vf.temp_vec4(pin_group);
936 
937             shader.emit_instruction(new AluInstr(op2_lshr_int,
938                                                  addr,
939                                                  vf.src(intrin->src[0], 0),
940                                                  vf.literal(2),
941                                                  AluInstr::write));
942             shader.emit_instruction(new AluInstr(op2_and_int,
943                                                  low_bit,
944                                                  vf.src(intrin->src[0], 0),
945                                                  vf.one_i(),
946                                                  AluInstr::write));
947             shader.emit_instruction(new AluInstr(op2_and_int,
948                                                  high_bit,
949                                                  vf.src(intrin->src[0], 0),
950                                                  vf.literal(2),
951                                                  AluInstr::last_write));
952 
953             shader.emit_instruction(new LoadFromBuffer(trgt,
954                                                        {0, 1, 2, 3},
955                                                        addr,
956                                                        R600_SHADER_BUFFER_INFO_SEL,
957                                                        R600_BUFFER_INFO_CONST_BUFFER,
958                                                        nullptr,
959                                                        fmt_32_32_32_32_float));
960 
961             // this may be wrong
962             shader.emit_instruction(new AluInstr(
963                op3_cnde_int, comp1, high_bit, trgt[0], trgt[2], AluInstr::write));
964             shader.emit_instruction(new AluInstr(
965                op3_cnde_int, comp2, high_bit, trgt[1], trgt[3], AluInstr::last_write));
966             shader.emit_instruction(new AluInstr(
967                op3_cnde_int, dest[2], low_bit, comp1, comp2, AluInstr::last_write));
968          }
969       } else {
970          auto dest = vf.dest_vec4(intrin->def, pin_group);
971          shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
972                                               dest,
973                                               {0, 1, 2, 3},
974                                               src,
975                                               res_id,
976                                               dyn_offset));
977       }
978    }
979    return true;
980 }
981 
982 bool
emit_image_samples(nir_intrinsic_instr * intrin,Shader & shader)983 RatInstr::emit_image_samples(nir_intrinsic_instr *intrin, Shader& shader)
984 {
985    auto& vf = shader.value_factory();
986 
987    auto src = RegisterVec4(0, true, {4, 4, 4, 4});
988 
989    auto tmp =  shader.value_factory().temp_vec4(pin_group);
990    auto dest =  shader.value_factory().dest(intrin->def, 0, pin_free);
991 
992    auto const_offset = nir_src_as_const_value(intrin->src[0]);
993    PRegister dyn_offset = nullptr;
994 
995    int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET + nir_intrinsic_range_base(intrin);
996    if (const_offset)
997       res_id += const_offset[0].u32;
998    else
999       dyn_offset = shader.emit_load_to_register(vf.src(intrin->src[0], 0));
1000 
1001    shader.emit_instruction(new TexInstr(TexInstr::get_resinfo,
1002                                         tmp,
1003                                         {3, 7, 7, 7},
1004                                         src,
1005                                         res_id,
1006                                         dyn_offset));
1007 
1008    shader.emit_instruction(new AluInstr(op1_mov, dest, tmp[0], AluInstr::last_write));
1009    return true;
1010 }
1011 
1012 } // namespace r600
1013