xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_shader.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef SFN_SHADER_H
8 #define SFN_SHADER_H
9 
10 #include "amd_family.h"
11 #include "compiler/shader_enums.h"
12 #include "gallium/drivers/r600/r600_shader.h"
13 #include "sfn_instr.h"
14 #include "sfn_instr_controlflow.h"
15 #include "sfn_instrfactory.h"
16 #include "sfn_liverangeevaluator.h"
17 
18 #include <bitset>
19 #include <memory>
20 #include <stack>
21 #include <vector>
22 
23 struct nir_shader;
24 struct nir_cf_node;
25 struct nir_if;
26 struct nir_block;
27 struct nir_instr;
28 
29 namespace r600 {
30 
31 class ShaderIO {
32 public:
33    void print(std::ostream& os) const;
34 
location()35    int location() const { return m_location; }
set_location(int location)36    void set_location(int location) { m_location = location; }
37 
varying_slot()38    gl_varying_slot varying_slot() const { return m_varying_slot; }
set_varying_slot(gl_varying_slot varying_slot)39    void set_varying_slot(gl_varying_slot varying_slot) { m_varying_slot = varying_slot; }
40 
no_varying()41    bool no_varying() const { return m_no_varying; }
set_no_varying(bool no_varying)42    void set_no_varying(bool no_varying) { m_no_varying = no_varying; }
43 
44    int spi_sid() const;
45 
set_gpr(int gpr)46    void set_gpr(int gpr) { m_gpr = gpr; }
gpr()47    int gpr() const { return m_gpr; }
48 
49 protected:
50    ShaderIO(const char *type, int loc, gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS);
51 
52 private:
53    virtual void do_print(std::ostream& os) const = 0;
54 
55    const char *m_type;
56    int m_location{-1};
57    gl_varying_slot m_varying_slot{NUM_TOTAL_VARYING_SLOTS};
58    bool m_no_varying{false};
59    int m_gpr{0};
60 };
61 
62 class ShaderOutput : public ShaderIO {
63 public:
64    ShaderOutput();
65    ShaderOutput(int location, int writemask,
66                 gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS);
67 
frag_result()68    gl_frag_result frag_result() const { return m_frag_result; }
set_frag_result(gl_frag_result frag_result)69    void set_frag_result(gl_frag_result frag_result) { m_frag_result = frag_result; }
70 
writemask()71    int writemask() const { return m_writemask; }
set_writemask(int writemask)72    void set_writemask(int writemask) { m_writemask = writemask; }
73 
export_param()74    int export_param() const { return m_export_param; }
set_export_param(int export_param)75    void set_export_param(int export_param) { m_export_param = export_param; }
76 
77 private:
78    void do_print(std::ostream& os) const override;
79 
80    gl_frag_result m_frag_result{static_cast<gl_frag_result>(FRAG_RESULT_MAX)};
81    int m_writemask{0};
82    int m_export_param{-1};
83 };
84 
85 class ShaderInput : public ShaderIO {
86 public:
87    ShaderInput();
88    ShaderInput(int location, gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS);
89 
system_value()90    gl_system_value system_value() const { return m_system_value; }
set_system_value(gl_system_value system_value)91    void set_system_value(gl_system_value system_value) { m_system_value = system_value; }
92 
93    void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid);
94    void set_uses_interpolate_at_centroid();
set_need_lds_pos()95    void set_need_lds_pos() { m_need_lds_pos = true; }
ij_index()96    int ij_index() const { return m_ij_index; }
97 
interpolator()98    int interpolator() const { return m_interpolator; }
interpolate_loc()99    int interpolate_loc() const { return m_interpolate_loc; }
need_lds_pos()100    bool need_lds_pos() const { return m_need_lds_pos; }
lds_pos()101    int lds_pos() const { return m_lds_pos; }
set_lds_pos(int pos)102    void set_lds_pos(int pos) { m_lds_pos = pos; }
103 
ring_offset()104    int ring_offset() const { return m_ring_offset; }
set_ring_offset(int offs)105    void set_ring_offset(int offs) { m_ring_offset = offs; }
uses_interpolate_at_centroid()106    bool uses_interpolate_at_centroid() const { return m_uses_interpolate_at_centroid; }
107 
108 private:
109    void do_print(std::ostream& os) const override;
110 
111    gl_system_value m_system_value{SYSTEM_VALUE_MAX};
112    int m_interpolator{0};
113    int m_interpolate_loc{0};
114    int m_ij_index{0};
115    bool m_uses_interpolate_at_centroid{false};
116    bool m_need_lds_pos{false};
117    int m_lds_pos{0};
118    int m_ring_offset{0};
119 };
120 
121 class Shader : public Allocate {
122 public:
123    using InputIterator = std::map<int, ShaderInput>::iterator;
124    using OutputIterator = std::map<int, ShaderOutput>::iterator;
125 
126    using ShaderBlocks = std::list<Block::Pointer, Allocator<Block::Pointer>>;
127 
128    Shader(const Shader& orig) = delete;
129 
~Shader()130    virtual ~Shader() {}
131 
shader_id()132    auto shader_id() const {return m_shader_id;}
133    // Needed for testing
reset_shader_id()134    void reset_shader_id() {m_shader_id = 0;}
135 
136    bool add_info_from_string(std::istream& is);
137 
138    static Shader *translate_from_nir(nir_shader *nir,
139                                      const pipe_stream_output_info *so_info,
140                                      r600_shader *gs_shader,
141                                      const r600_shader_key& key,
142                                      r600_chip_class chip_class,
143                                      radeon_family family);
144 
145    bool process(nir_shader *nir);
146 
147    bool process_cf_node(nir_cf_node *node);
148    bool process_if(nir_if *node);
149    bool process_loop(nir_loop *node);
150    bool process_block(nir_block *node);
151    bool process_instr(nir_instr *instr);
152    void emit_instruction(PInst instr);
153    bool emit_atomic_local_shared(nir_intrinsic_instr *instr);
154 
155    void print(std::ostream& os) const;
156    void print_header(std::ostream& os) const;
157 
158    bool process_intrinsic(nir_intrinsic_instr *intr);
159 
160    virtual bool load_input(nir_intrinsic_instr *intr) = 0;
161    virtual bool store_output(nir_intrinsic_instr *intr) = 0;
162 
163    bool load_ubo(nir_intrinsic_instr *intr);
164 
165    ValueFactory& value_factory();
166 
add_output(const ShaderOutput & output)167    void add_output(const ShaderOutput& output) { m_outputs[output.location()] = output; }
168 
add_input(const ShaderInput & input)169    void add_input(const ShaderInput& input) { m_inputs[input.location()] = input; }
170 
171    void set_input_gpr(int driver_lcation, int gpr);
172 
find_input(int location)173    InputIterator find_input(int location) { return m_inputs.find(location); }
174 
input_not_found()175    InputIterator input_not_found() { return m_inputs.end(); }
176 
177    OutputIterator find_output(int location);
output_not_found()178    OutputIterator output_not_found() { return m_outputs.end(); }
179 
func()180    ShaderBlocks& func() { return m_root; }
181    void reset_function(ShaderBlocks& new_root);
182 
183    void emit_instruction_from_string(const std::string& s);
184 
185    void set_info(nir_shader *nir);
186    void get_shader_info(r600_shader *sh_info);
187 
chip_class()188    r600_chip_class chip_class() const { return m_chip_class; }
set_chip_class(r600_chip_class cls)189    void set_chip_class(r600_chip_class cls) { m_chip_class = cls; }
190 
chip_family()191    radeon_family chip_family() const { return m_chip_family; }
set_chip_family(radeon_family family)192    void set_chip_family(radeon_family family) { m_chip_family = family; }
193 
194    void start_new_block(int nesting_depth);
195 
196    const ShaderOutput& output(int base) const;
197 
198    LiveRangeMap prepare_live_range_map();
199 
set_last_txd(Instr * txd)200    void set_last_txd(Instr *txd) { m_last_txd = txd; }
last_txd()201    Instr *last_txd() { return m_last_txd; }
202 
203    // Needed for keeping the memory access in order
204    void chain_scratch_read(Instr *instr);
205    void chain_ssbo_read(Instr *instr);
206 
enabled_stream_buffers_mask()207    virtual uint32_t enabled_stream_buffers_mask() const { return 0; }
208 
noutputs()209    size_t noutputs() const { return m_outputs.size(); }
ninputs()210    size_t ninputs() const { return m_inputs.size(); }
211 
212    enum Flags {
213       sh_indirect_const_file,
214       sh_needs_scratch_space,
215       sh_needs_sbo_ret_address,
216       sh_uses_atomics,
217       sh_uses_images,
218       sh_uses_tex_buffer,
219       sh_writes_memory,
220       sh_txs_cube_array_comp,
221       sh_indirect_atomic,
222       sh_mem_barrier,
223       sh_legacy_math_rules,
224       sh_disble_sb,
225       sh_flags_count
226    };
227 
set_flag(Flags f)228    void set_flag(Flags f) { m_flags.set(f); }
has_flag(Flags f)229    bool has_flag(Flags f) const { return m_flags.test(f); }
230 
atomic_file_count()231    int atomic_file_count() const { return m_atomic_file_count; }
232 
233    PRegister atomic_update();
234    int remap_atomic_base(int base);
235    auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id)
236       -> std::pair<int, PRegister>;
ssbo_image_offset()237    int ssbo_image_offset() const { return m_ssbo_image_offset; }
rat_return_address()238    PRegister rat_return_address()
239    {
240       assert(m_rat_return_address);
241       return m_rat_return_address;
242    }
243 
244    PRegister emit_load_to_register(PVirtualValue src, int chan = -1);
245 
image_size_const_offset()246    virtual unsigned image_size_const_offset() { return 0;}
247 
required_registers()248    auto required_registers() const { return m_required_registers;}
249 
250 protected:
251    enum ESlots {
252       es_face,
253       es_instanceid,
254       es_invocation_id,
255       es_patch_id,
256       es_pos,
257       es_rel_patch_id,
258       es_sample_mask_in,
259       es_sample_id,
260       es_sample_pos,
261       es_tess_factor_base,
262       es_vertexid,
263       es_tess_coord,
264       es_primitive_id,
265       es_helper_invocation,
266       es_last
267    };
268 
269    std::bitset<es_last> m_sv_values;
270 
271    Shader(const char *type_id, unsigned atomic_base);
272 
273    const ShaderInput& input(int base) const;
274 
275    bool emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin = pin_free);
276 
277    template <typename T>
278    using IOMap = std::map<int, T, std::less<int>, Allocator<std::pair<const int, T>>>;
279 
inputs()280    IOMap<ShaderInput>& inputs() { return m_inputs; }
281 
282 private:
283    virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0;
284 
285    bool allocate_registers_from_string(std::istream& is, Pin pin);
286    bool allocate_arrays_from_string(std::istream& is);
287 
288    bool read_chipclass(std::istream& is);
289    bool read_family(std::istream& is);
290 
291    bool scan_shader(const nir_function *impl);
292    bool scan_uniforms(nir_variable *uniform);
293    void allocate_reserved_registers();
294 
295    virtual int do_allocate_reserved_registers() = 0;
296 
297    bool scan_instruction(nir_instr *instr);
298    virtual bool do_scan_instruction(nir_instr *instr) = 0;
299 
300    void print_properties(std::ostream& os) const;
301    virtual void do_print_properties(std::ostream& os) const = 0;
302 
303    bool read_output(std::istream& is);
304    bool read_input(std::istream& is);
305    virtual bool read_prop(std::istream& is) = 0;
306 
307    bool emit_control_flow(ControlFlowInstr::CFType type);
308    bool emit_store_scratch(nir_intrinsic_instr *intr);
309    bool emit_load_scratch(nir_intrinsic_instr *intr);
310    bool emit_load_global(nir_intrinsic_instr *intr);
311    bool emit_local_store(nir_intrinsic_instr *intr);
312    bool emit_local_load(nir_intrinsic_instr *instr);
313    bool emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset);
314    bool emit_group_barrier(nir_intrinsic_instr *intr);
315    bool emit_shader_clock(nir_intrinsic_instr *instr);
316    bool emit_wait_ack();
317    bool emit_barrier(nir_intrinsic_instr *instr);
318    bool emit_tex_fdd(const nir_intrinsic_instr* intr, int opcode, bool fine);
319    bool emit_load_reg(nir_intrinsic_instr *intr);
320    bool emit_load_reg_indirect(nir_intrinsic_instr *intr);
321    bool emit_store_reg(nir_intrinsic_instr *intr);
322    bool emit_store_reg_indirect(nir_intrinsic_instr *intr);
323 
324    bool equal_to(const Shader& other) const;
325    void finalize();
326    virtual void do_finalize();
327 
328    virtual void do_get_shader_info(r600_shader *sh_info);
329 
330    ShaderBlocks m_root;
331    Block::Pointer m_current_block;
332 
333    InstrFactory *m_instr_factory;
334    const char *m_type_id;
335 
336    IOMap<ShaderOutput> m_outputs;
337    IOMap<ShaderInput> m_inputs;
338    r600_chip_class m_chip_class;
339    radeon_family m_chip_family{CHIP_CEDAR};
340 
341    int m_scratch_size;
342    int m_next_block;
343    bool m_indirect_const_file{false};
344 
345    Instr *m_last_txd{nullptr};
346 
347    uint32_t m_indirect_files{0};
348    std::bitset<sh_flags_count> m_flags;
349    uint32_t nhwatomic_ranges{0};
350    std::vector<r600_shader_atomic, Allocator<r600_shader_atomic>> m_atomics;
351 
352    uint32_t m_nhwatomic{0};
353    uint32_t m_atomic_base{0};
354    uint32_t m_next_hwatomic_loc{0};
355    std::unordered_map<int, int,
356                       std::hash<int>,  std::equal_to<int>,
357                       Allocator<std::pair<const int, int>>> m_atomic_base_map;
358    uint32_t m_atomic_file_count{0};
359    PRegister m_atomic_update{nullptr};
360    PRegister m_rat_return_address{nullptr};
361 
362    int32_t m_ssbo_image_offset{0};
363    uint32_t m_nloops{0};
364    uint32_t m_required_registers{0};
365 
366    int64_t m_shader_id;
367    static int64_t s_next_shader_id;
368 
369    class InstructionChain : public InstrVisitor {
370    public:
visit(AluGroup * instr)371       void visit(AluGroup *instr) override { (void)instr; }
visit(TexInstr * instr)372       void visit(TexInstr *instr) override { (void)instr; }
visit(ExportInstr * instr)373       void visit(ExportInstr *instr) override { (void)instr; }
visit(FetchInstr * instr)374       void visit(FetchInstr *instr) override { (void)instr; }
visit(Block * instr)375       void visit(Block *instr) override { (void)instr; }
visit(ControlFlowInstr * instr)376       void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)377       void visit(IfInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)378       void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)379       void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)380       void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(WriteTFInstr * instr)381       void visit(WriteTFInstr *instr) override { (void)instr; }
visit(LDSAtomicInstr * instr)382       void visit(LDSAtomicInstr *instr) override { (void)instr; }
visit(LDSReadInstr * instr)383       void visit(LDSReadInstr *instr) override { (void)instr; }
384 
385       void visit(AluInstr *instr) override;
386       void visit(ScratchIOInstr *instr) override;
387       void visit(GDSInstr *instr) override;
388       void visit(RatInstr *instr) override;
389 
390       void apply(Instr *current, Instr **last);
391 
392       Shader *this_shader{nullptr};
393       Instr *last_scratch_instr{nullptr};
394       Instr *last_gds_instr{nullptr};
395       Instr *last_ssbo_instr{nullptr};
396       Instr *last_kill_instr{nullptr};
397       Instr *last_lds_access{nullptr};
398       Instr *last_group_barrier{nullptr};
399       std::unordered_map<int, Instr * > last_alu_with_indirect_reg;
400       bool prepare_mem_barrier{false};
401    };
402 
403    InstructionChain m_chain_instr;
404    std::list<Instr *, Allocator<Instr *>> m_loops;
405    int m_control_flow_depth{0};
406    std::list<nir_intrinsic_instr*> m_register_allocations;
407 };
408 
409 } // namespace r600
410 
411 #endif // SHADER_H
412