1 /* -*- mesa-c++ -*- 2 * Copyright 2022 Collabora LTD 3 * Author: Gert Wollny <[email protected]> 4 * SPDX-License-Identifier: MIT 5 */ 6 7 #ifndef SFN_SHADER_H 8 #define SFN_SHADER_H 9 10 #include "amd_family.h" 11 #include "compiler/shader_enums.h" 12 #include "gallium/drivers/r600/r600_shader.h" 13 #include "sfn_instr.h" 14 #include "sfn_instr_controlflow.h" 15 #include "sfn_instrfactory.h" 16 #include "sfn_liverangeevaluator.h" 17 18 #include <bitset> 19 #include <memory> 20 #include <stack> 21 #include <vector> 22 23 struct nir_shader; 24 struct nir_cf_node; 25 struct nir_if; 26 struct nir_block; 27 struct nir_instr; 28 29 namespace r600 { 30 31 class ShaderIO { 32 public: 33 void print(std::ostream& os) const; 34 location()35 int location() const { return m_location; } set_location(int location)36 void set_location(int location) { m_location = location; } 37 varying_slot()38 gl_varying_slot varying_slot() const { return m_varying_slot; } set_varying_slot(gl_varying_slot varying_slot)39 void set_varying_slot(gl_varying_slot varying_slot) { m_varying_slot = varying_slot; } 40 no_varying()41 bool no_varying() const { return m_no_varying; } set_no_varying(bool no_varying)42 void set_no_varying(bool no_varying) { m_no_varying = no_varying; } 43 44 int spi_sid() const; 45 set_gpr(int gpr)46 void set_gpr(int gpr) { m_gpr = gpr; } gpr()47 int gpr() const { return m_gpr; } 48 49 protected: 50 ShaderIO(const char *type, int loc, gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS); 51 52 private: 53 virtual void do_print(std::ostream& os) const = 0; 54 55 const char *m_type; 56 int m_location{-1}; 57 gl_varying_slot m_varying_slot{NUM_TOTAL_VARYING_SLOTS}; 58 bool m_no_varying{false}; 59 int m_gpr{0}; 60 }; 61 62 class ShaderOutput : public ShaderIO { 63 public: 64 ShaderOutput(); 65 ShaderOutput(int location, int writemask, 66 gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS); 67 frag_result()68 gl_frag_result frag_result() const { return m_frag_result; } set_frag_result(gl_frag_result frag_result)69 void set_frag_result(gl_frag_result frag_result) { m_frag_result = frag_result; } 70 writemask()71 int writemask() const { return m_writemask; } set_writemask(int writemask)72 void set_writemask(int writemask) { m_writemask = writemask; } 73 export_param()74 int export_param() const { return m_export_param; } set_export_param(int export_param)75 void set_export_param(int export_param) { m_export_param = export_param; } 76 77 private: 78 void do_print(std::ostream& os) const override; 79 80 gl_frag_result m_frag_result{static_cast<gl_frag_result>(FRAG_RESULT_MAX)}; 81 int m_writemask{0}; 82 int m_export_param{-1}; 83 }; 84 85 class ShaderInput : public ShaderIO { 86 public: 87 ShaderInput(); 88 ShaderInput(int location, gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS); 89 system_value()90 gl_system_value system_value() const { return m_system_value; } set_system_value(gl_system_value system_value)91 void set_system_value(gl_system_value system_value) { m_system_value = system_value; } 92 93 void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid); 94 void set_uses_interpolate_at_centroid(); set_need_lds_pos()95 void set_need_lds_pos() { m_need_lds_pos = true; } ij_index()96 int ij_index() const { return m_ij_index; } 97 interpolator()98 int interpolator() const { return m_interpolator; } interpolate_loc()99 int interpolate_loc() const { return m_interpolate_loc; } need_lds_pos()100 bool need_lds_pos() const { return m_need_lds_pos; } lds_pos()101 int lds_pos() const { return m_lds_pos; } set_lds_pos(int pos)102 void set_lds_pos(int pos) { m_lds_pos = pos; } 103 ring_offset()104 int ring_offset() const { return m_ring_offset; } set_ring_offset(int offs)105 void set_ring_offset(int offs) { m_ring_offset = offs; } uses_interpolate_at_centroid()106 bool uses_interpolate_at_centroid() const { return m_uses_interpolate_at_centroid; } 107 108 private: 109 void do_print(std::ostream& os) const override; 110 111 gl_system_value m_system_value{SYSTEM_VALUE_MAX}; 112 int m_interpolator{0}; 113 int m_interpolate_loc{0}; 114 int m_ij_index{0}; 115 bool m_uses_interpolate_at_centroid{false}; 116 bool m_need_lds_pos{false}; 117 int m_lds_pos{0}; 118 int m_ring_offset{0}; 119 }; 120 121 class Shader : public Allocate { 122 public: 123 using InputIterator = std::map<int, ShaderInput>::iterator; 124 using OutputIterator = std::map<int, ShaderOutput>::iterator; 125 126 using ShaderBlocks = std::list<Block::Pointer, Allocator<Block::Pointer>>; 127 128 Shader(const Shader& orig) = delete; 129 ~Shader()130 virtual ~Shader() {} 131 shader_id()132 auto shader_id() const {return m_shader_id;} 133 // Needed for testing reset_shader_id()134 void reset_shader_id() {m_shader_id = 0;} 135 136 bool add_info_from_string(std::istream& is); 137 138 static Shader *translate_from_nir(nir_shader *nir, 139 const pipe_stream_output_info *so_info, 140 r600_shader *gs_shader, 141 const r600_shader_key& key, 142 r600_chip_class chip_class, 143 radeon_family family); 144 145 bool process(nir_shader *nir); 146 147 bool process_cf_node(nir_cf_node *node); 148 bool process_if(nir_if *node); 149 bool process_loop(nir_loop *node); 150 bool process_block(nir_block *node); 151 bool process_instr(nir_instr *instr); 152 void emit_instruction(PInst instr); 153 bool emit_atomic_local_shared(nir_intrinsic_instr *instr); 154 155 void print(std::ostream& os) const; 156 void print_header(std::ostream& os) const; 157 158 bool process_intrinsic(nir_intrinsic_instr *intr); 159 160 virtual bool load_input(nir_intrinsic_instr *intr) = 0; 161 virtual bool store_output(nir_intrinsic_instr *intr) = 0; 162 163 bool load_ubo(nir_intrinsic_instr *intr); 164 165 ValueFactory& value_factory(); 166 add_output(const ShaderOutput & output)167 void add_output(const ShaderOutput& output) { m_outputs[output.location()] = output; } 168 add_input(const ShaderInput & input)169 void add_input(const ShaderInput& input) { m_inputs[input.location()] = input; } 170 171 void set_input_gpr(int driver_lcation, int gpr); 172 find_input(int location)173 InputIterator find_input(int location) { return m_inputs.find(location); } 174 input_not_found()175 InputIterator input_not_found() { return m_inputs.end(); } 176 177 OutputIterator find_output(int location); output_not_found()178 OutputIterator output_not_found() { return m_outputs.end(); } 179 func()180 ShaderBlocks& func() { return m_root; } 181 void reset_function(ShaderBlocks& new_root); 182 183 void emit_instruction_from_string(const std::string& s); 184 185 void set_info(nir_shader *nir); 186 void get_shader_info(r600_shader *sh_info); 187 chip_class()188 r600_chip_class chip_class() const { return m_chip_class; } set_chip_class(r600_chip_class cls)189 void set_chip_class(r600_chip_class cls) { m_chip_class = cls; } 190 chip_family()191 radeon_family chip_family() const { return m_chip_family; } set_chip_family(radeon_family family)192 void set_chip_family(radeon_family family) { m_chip_family = family; } 193 194 void start_new_block(int nesting_depth); 195 196 const ShaderOutput& output(int base) const; 197 198 LiveRangeMap prepare_live_range_map(); 199 set_last_txd(Instr * txd)200 void set_last_txd(Instr *txd) { m_last_txd = txd; } last_txd()201 Instr *last_txd() { return m_last_txd; } 202 203 // Needed for keeping the memory access in order 204 void chain_scratch_read(Instr *instr); 205 void chain_ssbo_read(Instr *instr); 206 enabled_stream_buffers_mask()207 virtual uint32_t enabled_stream_buffers_mask() const { return 0; } 208 noutputs()209 size_t noutputs() const { return m_outputs.size(); } ninputs()210 size_t ninputs() const { return m_inputs.size(); } 211 212 enum Flags { 213 sh_indirect_const_file, 214 sh_needs_scratch_space, 215 sh_needs_sbo_ret_address, 216 sh_uses_atomics, 217 sh_uses_images, 218 sh_uses_tex_buffer, 219 sh_writes_memory, 220 sh_txs_cube_array_comp, 221 sh_indirect_atomic, 222 sh_mem_barrier, 223 sh_legacy_math_rules, 224 sh_disble_sb, 225 sh_flags_count 226 }; 227 set_flag(Flags f)228 void set_flag(Flags f) { m_flags.set(f); } has_flag(Flags f)229 bool has_flag(Flags f) const { return m_flags.test(f); } 230 atomic_file_count()231 int atomic_file_count() const { return m_atomic_file_count; } 232 233 PRegister atomic_update(); 234 int remap_atomic_base(int base); 235 auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) 236 -> std::pair<int, PRegister>; ssbo_image_offset()237 int ssbo_image_offset() const { return m_ssbo_image_offset; } rat_return_address()238 PRegister rat_return_address() 239 { 240 assert(m_rat_return_address); 241 return m_rat_return_address; 242 } 243 244 PRegister emit_load_to_register(PVirtualValue src, int chan = -1); 245 image_size_const_offset()246 virtual unsigned image_size_const_offset() { return 0;} 247 required_registers()248 auto required_registers() const { return m_required_registers;} 249 250 protected: 251 enum ESlots { 252 es_face, 253 es_instanceid, 254 es_invocation_id, 255 es_patch_id, 256 es_pos, 257 es_rel_patch_id, 258 es_sample_mask_in, 259 es_sample_id, 260 es_sample_pos, 261 es_tess_factor_base, 262 es_vertexid, 263 es_tess_coord, 264 es_primitive_id, 265 es_helper_invocation, 266 es_last 267 }; 268 269 std::bitset<es_last> m_sv_values; 270 271 Shader(const char *type_id, unsigned atomic_base); 272 273 const ShaderInput& input(int base) const; 274 275 bool emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin = pin_free); 276 277 template <typename T> 278 using IOMap = std::map<int, T, std::less<int>, Allocator<std::pair<const int, T>>>; 279 inputs()280 IOMap<ShaderInput>& inputs() { return m_inputs; } 281 282 private: 283 virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0; 284 285 bool allocate_registers_from_string(std::istream& is, Pin pin); 286 bool allocate_arrays_from_string(std::istream& is); 287 288 bool read_chipclass(std::istream& is); 289 bool read_family(std::istream& is); 290 291 bool scan_shader(const nir_function *impl); 292 bool scan_uniforms(nir_variable *uniform); 293 void allocate_reserved_registers(); 294 295 virtual int do_allocate_reserved_registers() = 0; 296 297 bool scan_instruction(nir_instr *instr); 298 virtual bool do_scan_instruction(nir_instr *instr) = 0; 299 300 void print_properties(std::ostream& os) const; 301 virtual void do_print_properties(std::ostream& os) const = 0; 302 303 bool read_output(std::istream& is); 304 bool read_input(std::istream& is); 305 virtual bool read_prop(std::istream& is) = 0; 306 307 bool emit_control_flow(ControlFlowInstr::CFType type); 308 bool emit_store_scratch(nir_intrinsic_instr *intr); 309 bool emit_load_scratch(nir_intrinsic_instr *intr); 310 bool emit_load_global(nir_intrinsic_instr *intr); 311 bool emit_local_store(nir_intrinsic_instr *intr); 312 bool emit_local_load(nir_intrinsic_instr *instr); 313 bool emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset); 314 bool emit_group_barrier(nir_intrinsic_instr *intr); 315 bool emit_shader_clock(nir_intrinsic_instr *instr); 316 bool emit_wait_ack(); 317 bool emit_barrier(nir_intrinsic_instr *instr); 318 bool emit_tex_fdd(const nir_intrinsic_instr* intr, int opcode, bool fine); 319 bool emit_load_reg(nir_intrinsic_instr *intr); 320 bool emit_load_reg_indirect(nir_intrinsic_instr *intr); 321 bool emit_store_reg(nir_intrinsic_instr *intr); 322 bool emit_store_reg_indirect(nir_intrinsic_instr *intr); 323 324 bool equal_to(const Shader& other) const; 325 void finalize(); 326 virtual void do_finalize(); 327 328 virtual void do_get_shader_info(r600_shader *sh_info); 329 330 ShaderBlocks m_root; 331 Block::Pointer m_current_block; 332 333 InstrFactory *m_instr_factory; 334 const char *m_type_id; 335 336 IOMap<ShaderOutput> m_outputs; 337 IOMap<ShaderInput> m_inputs; 338 r600_chip_class m_chip_class; 339 radeon_family m_chip_family{CHIP_CEDAR}; 340 341 int m_scratch_size; 342 int m_next_block; 343 bool m_indirect_const_file{false}; 344 345 Instr *m_last_txd{nullptr}; 346 347 uint32_t m_indirect_files{0}; 348 std::bitset<sh_flags_count> m_flags; 349 uint32_t nhwatomic_ranges{0}; 350 std::vector<r600_shader_atomic, Allocator<r600_shader_atomic>> m_atomics; 351 352 uint32_t m_nhwatomic{0}; 353 uint32_t m_atomic_base{0}; 354 uint32_t m_next_hwatomic_loc{0}; 355 std::unordered_map<int, int, 356 std::hash<int>, std::equal_to<int>, 357 Allocator<std::pair<const int, int>>> m_atomic_base_map; 358 uint32_t m_atomic_file_count{0}; 359 PRegister m_atomic_update{nullptr}; 360 PRegister m_rat_return_address{nullptr}; 361 362 int32_t m_ssbo_image_offset{0}; 363 uint32_t m_nloops{0}; 364 uint32_t m_required_registers{0}; 365 366 int64_t m_shader_id; 367 static int64_t s_next_shader_id; 368 369 class InstructionChain : public InstrVisitor { 370 public: visit(AluGroup * instr)371 void visit(AluGroup *instr) override { (void)instr; } visit(TexInstr * instr)372 void visit(TexInstr *instr) override { (void)instr; } visit(ExportInstr * instr)373 void visit(ExportInstr *instr) override { (void)instr; } visit(FetchInstr * instr)374 void visit(FetchInstr *instr) override { (void)instr; } visit(Block * instr)375 void visit(Block *instr) override { (void)instr; } visit(ControlFlowInstr * instr)376 void visit(ControlFlowInstr *instr) override { (void)instr; } visit(IfInstr * instr)377 void visit(IfInstr *instr) override { (void)instr; } visit(StreamOutInstr * instr)378 void visit(StreamOutInstr *instr) override { (void)instr; } visit(MemRingOutInstr * instr)379 void visit(MemRingOutInstr *instr) override { (void)instr; } visit(EmitVertexInstr * instr)380 void visit(EmitVertexInstr *instr) override { (void)instr; } visit(WriteTFInstr * instr)381 void visit(WriteTFInstr *instr) override { (void)instr; } visit(LDSAtomicInstr * instr)382 void visit(LDSAtomicInstr *instr) override { (void)instr; } visit(LDSReadInstr * instr)383 void visit(LDSReadInstr *instr) override { (void)instr; } 384 385 void visit(AluInstr *instr) override; 386 void visit(ScratchIOInstr *instr) override; 387 void visit(GDSInstr *instr) override; 388 void visit(RatInstr *instr) override; 389 390 void apply(Instr *current, Instr **last); 391 392 Shader *this_shader{nullptr}; 393 Instr *last_scratch_instr{nullptr}; 394 Instr *last_gds_instr{nullptr}; 395 Instr *last_ssbo_instr{nullptr}; 396 Instr *last_kill_instr{nullptr}; 397 Instr *last_lds_access{nullptr}; 398 Instr *last_group_barrier{nullptr}; 399 std::unordered_map<int, Instr * > last_alu_with_indirect_reg; 400 bool prepare_mem_barrier{false}; 401 }; 402 403 InstructionChain m_chain_instr; 404 std::list<Instr *, Allocator<Instr *>> m_loops; 405 int m_control_flow_depth{0}; 406 std::list<nir_intrinsic_instr*> m_register_allocations; 407 }; 408 409 } // namespace r600 410 411 #endif // SHADER_H 412