xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_nir.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2019 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_nir.h"
8 
9 #include "../r600_asm.h"
10 #include "../r600_pipe.h"
11 #include "../r600_shader.h"
12 
13 #include "nir.h"
14 #include "nir_builder.h"
15 #include "nir_intrinsics.h"
16 #include "sfn_assembler.h"
17 #include "sfn_debug.h"
18 #include "sfn_instr_tex.h"
19 #include "sfn_liverangeevaluator.h"
20 #include "sfn_nir_lower_alu.h"
21 #include "sfn_nir_lower_fs_out_to_vector.h"
22 #include "sfn_nir_lower_tex.h"
23 #include "sfn_optimizer.h"
24 #include "sfn_ra.h"
25 #include "sfn_scheduler.h"
26 #include "sfn_shader.h"
27 #include "sfn_split_address_loads.h"
28 #include "util/u_debug.h"
29 #include "util/u_prim.h"
30 
31 #include <vector>
32 
33 namespace r600 {
34 
35 using std::vector;
36 
NirLowerInstruction()37 NirLowerInstruction::NirLowerInstruction():
38     b(nullptr)
39 {
40 }
41 
42 bool
filter_instr(const nir_instr * instr,const void * data)43 NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
44 {
45    auto me = reinterpret_cast<const NirLowerInstruction *>(data);
46    return me->filter(instr);
47 }
48 
49 nir_def *
lower_instr(nir_builder * b,nir_instr * instr,void * data)50 NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data)
51 {
52    auto me = reinterpret_cast<NirLowerInstruction *>(data);
53    me->set_builder(b);
54    return me->lower(instr);
55 }
56 
57 bool
run(nir_shader * shader)58 NirLowerInstruction::run(nir_shader *shader)
59 {
60    return nir_shader_lower_instructions(shader, filter_instr, lower_instr, (void *)this);
61 }
62 
~AssemblyFromShader()63 AssemblyFromShader::~AssemblyFromShader() {}
64 
65 bool
lower(const Shader & ir)66 AssemblyFromShader::lower(const Shader& ir)
67 {
68    return do_lower(ir);
69 }
70 
71 static void
r600_nir_lower_scratch_address_impl(nir_builder * b,nir_intrinsic_instr * instr)72 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
73 {
74    b->cursor = nir_before_instr(&instr->instr);
75 
76    int address_index = 0;
77    int align;
78 
79    if (instr->intrinsic == nir_intrinsic_store_scratch) {
80       align = instr->src[0].ssa->num_components;
81       address_index = 1;
82    } else {
83       align = instr->def.num_components;
84    }
85 
86    nir_def *address = instr->src[address_index].ssa;
87    nir_def *new_address = nir_ishr_imm(b, address, 4 * align);
88 
89    nir_src_rewrite(&instr->src[address_index], new_address);
90 }
91 
92 bool
r600_lower_scratch_addresses(nir_shader * shader)93 r600_lower_scratch_addresses(nir_shader *shader)
94 {
95    bool progress = false;
96    nir_foreach_function_impl(impl, shader)
97    {
98       nir_builder build = nir_builder_create(impl);
99 
100       nir_foreach_block(block, impl)
101       {
102          nir_foreach_instr(instr, block)
103          {
104             if (instr->type != nir_instr_type_intrinsic)
105                continue;
106             nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
107             if (op->intrinsic != nir_intrinsic_load_scratch &&
108                 op->intrinsic != nir_intrinsic_store_scratch)
109                continue;
110             r600_nir_lower_scratch_address_impl(&build, op);
111             progress = true;
112          }
113       }
114    }
115    return progress;
116 }
117 
118 static void
insert_uniform_sorted(struct exec_list * var_list,nir_variable * new_var)119 insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
120 {
121    nir_foreach_variable_in_list(var, var_list)
122    {
123       if (var->data.binding > new_var->data.binding ||
124           (var->data.binding == new_var->data.binding &&
125            var->data.offset > new_var->data.offset)) {
126          exec_node_insert_node_before(&var->node, &new_var->node);
127          return;
128       }
129    }
130    exec_list_push_tail(var_list, &new_var->node);
131 }
132 
133 void
sort_uniforms(nir_shader * shader)134 sort_uniforms(nir_shader *shader)
135 {
136    struct exec_list new_list;
137    exec_list_make_empty(&new_list);
138 
139    nir_foreach_uniform_variable_safe(var, shader)
140    {
141       exec_node_remove(&var->node);
142       insert_uniform_sorted(&new_list, var);
143    }
144    exec_list_append(&shader->variables, &new_list);
145 }
146 
147 static void
insert_fsoutput_sorted(struct exec_list * var_list,nir_variable * new_var)148 insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
149 {
150 
151    nir_foreach_variable_in_list(var, var_list)
152    {
153       if ((var->data.location >= FRAG_RESULT_DATA0 ||
154           var->data.location == FRAG_RESULT_COLOR) &&
155           (new_var->data.location < FRAG_RESULT_COLOR ||
156            new_var->data.location == FRAG_RESULT_SAMPLE_MASK)) {
157          exec_node_insert_after(&var->node, &new_var->node);
158          return;
159       } else if ((new_var->data.location >= FRAG_RESULT_DATA0 ||
160                   new_var->data.location == FRAG_RESULT_COLOR) &&
161                  (var->data.location < FRAG_RESULT_COLOR ||
162                   var->data.location == FRAG_RESULT_SAMPLE_MASK)) {
163          exec_node_insert_node_before(&var->node, &new_var->node);
164          return;
165       } else if (var->data.location > new_var->data.location ||
166           (var->data.location == new_var->data.location &&
167            var->data.index > new_var->data.index)) {
168          exec_node_insert_node_before(&var->node, &new_var->node);
169          return;
170       }
171    }
172 
173    exec_list_push_tail(var_list, &new_var->node);
174 }
175 
176 void
sort_fsoutput(nir_shader * shader)177 sort_fsoutput(nir_shader *shader)
178 {
179    struct exec_list new_list;
180    exec_list_make_empty(&new_list);
181 
182    nir_foreach_shader_out_variable_safe(var, shader)
183    {
184       exec_node_remove(&var->node);
185       insert_fsoutput_sorted(&new_list, var);
186    }
187 
188    unsigned driver_location = 0;
189    nir_foreach_variable_in_list(var, &new_list) var->data.driver_location =
190       driver_location++;
191 
192    exec_list_append(&shader->variables, &new_list);
193 }
194 
195 class LowerClipvertexWrite : public NirLowerInstruction {
196 
197 public:
LowerClipvertexWrite(int noutputs,pipe_stream_output_info & so_info)198    LowerClipvertexWrite(int noutputs, pipe_stream_output_info& so_info):
199        m_clipplane1(noutputs),
200        m_clipvtx(noutputs + 1),
201        m_so_info(so_info)
202    {
203    }
204 
205 private:
filter(const nir_instr * instr) const206    bool filter(const nir_instr *instr) const override
207    {
208       if (instr->type != nir_instr_type_intrinsic)
209          return false;
210 
211       auto intr = nir_instr_as_intrinsic(instr);
212       if (intr->intrinsic != nir_intrinsic_store_output)
213          return false;
214 
215       return nir_intrinsic_io_semantics(intr).location == VARYING_SLOT_CLIP_VERTEX;
216    }
217 
lower(nir_instr * instr)218    nir_def *lower(nir_instr *instr) override
219    {
220 
221       auto intr = nir_instr_as_intrinsic(instr);
222       nir_def *output[8] = {nullptr};
223 
224       auto buf_id = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER);
225 
226       auto clip_vtx = intr->src[0].ssa;
227 
228       for (int i = 0; i < 8; ++i) {
229          auto sel = nir_imm_int(b, i);
230          auto mrow = nir_load_ubo_vec4(b, 4, 32, buf_id, sel);
231          output[i] = nir_fdot4(b, clip_vtx, mrow);
232       }
233 
234       unsigned clip_vertex_index = nir_intrinsic_base(intr);
235 
236       for (int i = 0; i < 2; ++i) {
237          auto clip_i = nir_vec(b, &output[4 * i], 4);
238          auto store = nir_store_output(b, clip_i, intr->src[1].ssa);
239          nir_intrinsic_set_write_mask(store, 0xf);
240          nir_intrinsic_set_base(store, clip_vertex_index);
241          nir_intrinsic_set_src_type(store, nir_type_float32);
242          nir_io_semantics semantic = nir_intrinsic_io_semantics(intr);
243          semantic.location = VARYING_SLOT_CLIP_DIST0 + i;
244          semantic.no_varying = 1;
245 
246          if (i > 0)
247             nir_intrinsic_set_base(store, m_clipplane1);
248          nir_intrinsic_set_write_mask(store, 0xf);
249          nir_intrinsic_set_io_semantics(store, semantic);
250       }
251       nir_intrinsic_set_base(intr, m_clipvtx);
252 
253       nir_def *result = NIR_LOWER_INSTR_PROGRESS_REPLACE;
254       for (unsigned i = 0; i < m_so_info.num_outputs; ++i) {
255          if (m_so_info.output[i].register_index == clip_vertex_index) {
256             m_so_info.output[i].register_index = m_clipvtx;
257             result = NIR_LOWER_INSTR_PROGRESS;
258          }
259       }
260       return result;
261    }
262    int m_clipplane1;
263    int m_clipvtx;
264    pipe_stream_output_info& m_so_info;
265 };
266 
267 /* lower_uniforms_to_ubo adds a 1 to the UBO buffer ID.
268  * If the buffer ID is a non-constant value we end up
269  * with "iadd bufid, 1", bot on r600 we can put that constant
270  * "1" as constant cache ID into the CF instruction and don't need
271  * to execute that extra ADD op, so eliminate the addition here
272  * again and move the buffer base ID into the base value of
273  * the intrinsic that is not used otherwise */
274 class OptIndirectUBOLoads : public NirLowerInstruction {
275 private:
filter(const nir_instr * instr) const276    bool filter(const nir_instr *instr) const override
277    {
278       if (instr->type != nir_instr_type_intrinsic)
279          return false;
280 
281       auto intr = nir_instr_as_intrinsic(instr);
282       if (intr->intrinsic != nir_intrinsic_load_ubo_vec4)
283          return false;
284 
285       if (nir_src_as_const_value(intr->src[0]) != nullptr)
286          return false;
287 
288       return nir_intrinsic_base(intr) == 0;
289    }
290 
lower(nir_instr * instr)291    nir_def *lower(nir_instr *instr) override
292    {
293       auto intr = nir_instr_as_intrinsic(instr);
294       assert(intr->intrinsic == nir_intrinsic_load_ubo_vec4);
295 
296       auto parent = intr->src[0].ssa->parent_instr;
297 
298       if (parent->type != nir_instr_type_alu)
299          return nullptr;
300 
301       auto alu = nir_instr_as_alu(parent);
302 
303       if (alu->op != nir_op_iadd)
304          return nullptr;
305 
306       int new_base = 0;
307       nir_src *new_bufid = nullptr;
308       auto src0 = nir_src_as_const_value(alu->src[0].src);
309       if (src0) {
310          new_bufid = &alu->src[1].src;
311          new_base = src0->i32;
312       } else if (auto src1 = nir_src_as_const_value(alu->src[1].src)) {
313          new_bufid = &alu->src[0].src;
314          new_base = src1->i32;
315       } else {
316          return nullptr;
317       }
318 
319       nir_intrinsic_set_base(intr, new_base);
320       nir_src_rewrite(&intr->src[0], new_bufid->ssa);
321       return &intr->def;
322    }
323 };
324 
325 } // namespace r600
326 
327 static nir_intrinsic_op
r600_map_atomic(nir_intrinsic_op op)328 r600_map_atomic(nir_intrinsic_op op)
329 {
330    switch (op) {
331    case nir_intrinsic_atomic_counter_read_deref:
332       return nir_intrinsic_atomic_counter_read;
333    case nir_intrinsic_atomic_counter_inc_deref:
334       return nir_intrinsic_atomic_counter_inc;
335    case nir_intrinsic_atomic_counter_pre_dec_deref:
336       return nir_intrinsic_atomic_counter_pre_dec;
337    case nir_intrinsic_atomic_counter_post_dec_deref:
338       return nir_intrinsic_atomic_counter_post_dec;
339    case nir_intrinsic_atomic_counter_add_deref:
340       return nir_intrinsic_atomic_counter_add;
341    case nir_intrinsic_atomic_counter_min_deref:
342       return nir_intrinsic_atomic_counter_min;
343    case nir_intrinsic_atomic_counter_max_deref:
344       return nir_intrinsic_atomic_counter_max;
345    case nir_intrinsic_atomic_counter_and_deref:
346       return nir_intrinsic_atomic_counter_and;
347    case nir_intrinsic_atomic_counter_or_deref:
348       return nir_intrinsic_atomic_counter_or;
349    case nir_intrinsic_atomic_counter_xor_deref:
350       return nir_intrinsic_atomic_counter_xor;
351    case nir_intrinsic_atomic_counter_exchange_deref:
352       return nir_intrinsic_atomic_counter_exchange;
353    case nir_intrinsic_atomic_counter_comp_swap_deref:
354       return nir_intrinsic_atomic_counter_comp_swap;
355    default:
356       return nir_num_intrinsics;
357    }
358 }
359 
360 static bool
r600_lower_deref_instr(nir_builder * b,nir_intrinsic_instr * instr,UNUSED void * cb_data)361 r600_lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
362                        UNUSED void *cb_data)
363 {
364    nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
365    if (nir_num_intrinsics == op)
366       return false;
367 
368    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
369    nir_variable *var = nir_deref_instr_get_variable(deref);
370 
371    if (var->data.mode != nir_var_uniform && var->data.mode != nir_var_mem_ssbo &&
372        var->data.mode != nir_var_mem_shared)
373       return false; /* atomics passed as function arguments can't be lowered */
374 
375    const unsigned idx = var->data.binding;
376 
377    b->cursor = nir_before_instr(&instr->instr);
378 
379    nir_def *offset = nir_imm_int(b, 0);
380    for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
381         d = nir_deref_instr_parent(d)) {
382       assert(d->deref_type == nir_deref_type_array);
383 
384       unsigned array_stride = 1;
385       if (glsl_type_is_array(d->type))
386          array_stride *= glsl_get_aoa_size(d->type);
387 
388       offset =
389          nir_iadd(b, offset, nir_imul_imm(b, d->arr.index.ssa, array_stride));
390    }
391 
392    /* Since the first source is a deref and the first source in the lowered
393     * instruction is the offset, we can just swap it out and change the
394     * opcode.
395     */
396    instr->intrinsic = op;
397    nir_src_rewrite(&instr->src[0], offset);
398    nir_intrinsic_set_base(instr, idx);
399    nir_intrinsic_set_range_base(instr, var->data.index);
400 
401    nir_deref_instr_remove_if_unused(deref);
402 
403    return true;
404 }
405 
406 static bool
r600_lower_clipvertex_to_clipdist(nir_shader * sh,pipe_stream_output_info & so_info)407 r600_lower_clipvertex_to_clipdist(nir_shader *sh, pipe_stream_output_info& so_info)
408 {
409    if (!(sh->info.outputs_written & VARYING_BIT_CLIP_VERTEX))
410       return false;
411 
412    int noutputs = util_bitcount64(sh->info.outputs_written);
413    bool result = r600::LowerClipvertexWrite(noutputs, so_info).run(sh);
414    return result;
415 }
416 
417 static bool
r600_nir_lower_atomics(nir_shader * shader)418 r600_nir_lower_atomics(nir_shader *shader)
419 {
420    /* In Hardware we start at a zero index for each new
421     * binding, and we use an offset of one per counter. We also
422     * need to sort the atomics according to binding and offset. */
423    std::map<unsigned, unsigned> binding_offset;
424    std::map<unsigned, nir_variable *> sorted_var;
425 
426    nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) {
427       if (glsl_contains_atomic(var->type)) {
428          sorted_var[(var->data.binding << 16) | var->data.offset] = var;
429          exec_node_remove(&var->node);
430       }
431    }
432 
433    for (auto& [dummy, var] : sorted_var) {
434       auto iindex = binding_offset.find(var->data.binding);
435       unsigned offset_update = glsl_atomic_size(var->type) / 4; /* ATOMIC_COUNTER_SIZE */
436       if (iindex == binding_offset.end()) {
437          var->data.index = 0;
438          binding_offset[var->data.binding] = offset_update;
439       } else {
440          var->data.index = iindex->second;
441          iindex->second += offset_update;
442       }
443       shader->variables.push_tail(&var->node);
444    }
445 
446    return nir_shader_intrinsics_pass(shader, r600_lower_deref_instr,
447                                      nir_metadata_control_flow, NULL);
448 }
449 using r600::r600_lower_fs_out_to_vector;
450 using r600::r600_lower_scratch_addresses;
451 using r600::r600_lower_ubo_to_align16;
452 
453 int
r600_glsl_type_size(const struct glsl_type * type,bool is_bindless)454 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
455 {
456    return glsl_count_vec4_slots(type, false, is_bindless);
457 }
458 
459 void
r600_get_natural_size_align_bytes(const struct glsl_type * type,unsigned * size,unsigned * align)460 r600_get_natural_size_align_bytes(const struct glsl_type *type,
461                                   unsigned *size,
462                                   unsigned *align)
463 {
464    if (type->base_type != GLSL_TYPE_ARRAY) {
465       *align = 1;
466       *size = 1;
467    } else {
468       unsigned elem_size, elem_align;
469       glsl_get_natural_size_align_bytes(type->fields.array, &elem_size, &elem_align);
470       *align = 1;
471       *size = type->length;
472    }
473 }
474 
475 static bool
r600_lower_shared_io_impl(nir_function_impl * impl)476 r600_lower_shared_io_impl(nir_function_impl *impl)
477 {
478    nir_builder b = nir_builder_create(impl);
479 
480    bool progress = false;
481    nir_foreach_block(block, impl)
482    {
483       nir_foreach_instr_safe(instr, block)
484       {
485 
486          if (instr->type != nir_instr_type_intrinsic)
487             continue;
488 
489          nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
490          if (op->intrinsic != nir_intrinsic_load_shared &&
491              op->intrinsic != nir_intrinsic_store_shared)
492             continue;
493 
494          b.cursor = nir_before_instr(instr);
495 
496          if (op->intrinsic == nir_intrinsic_load_shared) {
497             nir_def *addr = op->src[0].ssa;
498 
499             switch (op->def.num_components) {
500             case 2: {
501                auto addr2 = nir_iadd_imm(&b, addr, 4);
502                addr = nir_vec2(&b, addr, addr2);
503                break;
504             }
505             case 3: {
506                auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
507                addr =
508                   nir_vec3(&b, addr, nir_channel(&b, addr2, 0), nir_channel(&b, addr2, 1));
509                break;
510             }
511             case 4: {
512                addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
513                break;
514             }
515             }
516 
517             auto load =
518                nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
519             load->num_components = op->def.num_components;
520             load->src[0] = nir_src_for_ssa(addr);
521             nir_def_init(&load->instr, &load->def, load->num_components,
522                          32);
523             nir_def_rewrite_uses(&op->def, &load->def);
524             nir_builder_instr_insert(&b, &load->instr);
525          } else {
526             nir_def *addr = op->src[1].ssa;
527             for (int i = 0; i < 2; ++i) {
528                unsigned test_mask = (0x3 << 2 * i);
529                if (!(nir_intrinsic_write_mask(op) & test_mask))
530                   continue;
531 
532                auto store =
533                   nir_intrinsic_instr_create(b.shader,
534                                              nir_intrinsic_store_local_shared_r600);
535                unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
536                nir_intrinsic_set_write_mask(store, writemask);
537                store->src[0] = nir_src_for_ssa(op->src[0].ssa);
538                store->num_components = store->src[0].ssa->num_components;
539                bool start_even = (writemask & (1u << (2 * i)));
540 
541                auto addr2 =
542                   nir_iadd_imm(&b, addr, 8 * i + (start_even ? 0 : 4));
543                store->src[1] = nir_src_for_ssa(addr2);
544 
545                nir_builder_instr_insert(&b, &store->instr);
546             }
547          }
548          nir_instr_remove(instr);
549          progress = true;
550       }
551    }
552    return progress;
553 }
554 
555 static bool
r600_lower_shared_io(nir_shader * nir)556 r600_lower_shared_io(nir_shader *nir)
557 {
558    bool progress = false;
559    nir_foreach_function_impl(impl, nir)
560    {
561       if (r600_lower_shared_io_impl(impl))
562          progress = true;
563    }
564    return progress;
565 }
566 
567 static nir_def *
r600_lower_fs_pos_input_impl(nir_builder * b,nir_instr * instr,void * _options)568 r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
569 {
570    (void)_options;
571    auto old_ir = nir_instr_as_intrinsic(instr);
572    auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
573    nir_def_init(&load->instr, &load->def,
574                 old_ir->def.num_components, old_ir->def.bit_size);
575    nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir));
576 
577    nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir));
578    nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir));
579    nir_intrinsic_set_dest_type(load, nir_type_float32);
580    load->num_components = old_ir->num_components;
581    load->src[0] = old_ir->src[1];
582    nir_builder_instr_insert(b, &load->instr);
583    return &load->def;
584 }
585 
586 bool
r600_lower_fs_pos_input_filter(const nir_instr * instr,const void * _options)587 r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
588 {
589    (void)_options;
590 
591    if (instr->type != nir_instr_type_intrinsic)
592       return false;
593 
594    auto ir = nir_instr_as_intrinsic(instr);
595    if (ir->intrinsic != nir_intrinsic_load_interpolated_input)
596       return false;
597 
598    return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS;
599 }
600 
601 /* Strip the interpolator specification, it is not needed and irritates */
602 bool
r600_lower_fs_pos_input(nir_shader * shader)603 r600_lower_fs_pos_input(nir_shader *shader)
604 {
605    return nir_shader_lower_instructions(shader,
606                                         r600_lower_fs_pos_input_filter,
607                                         r600_lower_fs_pos_input_impl,
608                                         nullptr);
609 };
610 
611 bool
r600_opt_indirect_fbo_loads(nir_shader * shader)612 r600_opt_indirect_fbo_loads(nir_shader *shader)
613 {
614    return r600::OptIndirectUBOLoads().run(shader);
615 }
616 
617 static bool
optimize_once(nir_shader * shader)618 optimize_once(nir_shader *shader)
619 {
620    bool progress = false;
621    NIR_PASS(progress, shader, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
622    NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
623    NIR_PASS(progress, shader, nir_copy_prop);
624    NIR_PASS(progress, shader, nir_opt_dce);
625    NIR_PASS(progress, shader, nir_opt_algebraic);
626    if (shader->options->has_bitfield_select)
627       NIR_PASS(progress, shader, nir_opt_generate_bfi);
628    NIR_PASS(progress, shader, nir_opt_constant_folding);
629    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
630    NIR_PASS(progress, shader, nir_opt_remove_phis);
631 
632    if (nir_opt_loop(shader)) {
633       progress = true;
634       NIR_PASS(progress, shader, nir_copy_prop);
635       NIR_PASS(progress, shader, nir_opt_dce);
636    }
637 
638    NIR_PASS(progress, shader, nir_opt_if, nir_opt_if_optimize_phi_true_false);
639    NIR_PASS(progress, shader, nir_opt_dead_cf);
640    NIR_PASS(progress, shader, nir_opt_cse);
641    NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
642 
643    NIR_PASS(progress, shader, nir_opt_conditional_discard);
644    NIR_PASS(progress, shader, nir_opt_dce);
645    NIR_PASS(progress, shader, nir_opt_undef);
646    NIR_PASS(progress, shader, nir_opt_loop_unroll);
647    return progress;
648 }
649 
650 static bool
r600_is_last_vertex_stage(nir_shader * nir,const r600_shader_key & key)651 r600_is_last_vertex_stage(nir_shader *nir, const r600_shader_key& key)
652 {
653    if (nir->info.stage == MESA_SHADER_GEOMETRY)
654       return true;
655 
656    if (nir->info.stage == MESA_SHADER_TESS_EVAL && !key.tes.as_es)
657       return true;
658 
659    if (nir->info.stage == MESA_SHADER_VERTEX && !key.vs.as_es && !key.vs.as_ls)
660       return true;
661 
662    return false;
663 }
664 
665 extern "C" bool
r600_lower_to_scalar_instr_filter(const nir_instr * instr,const void *)666 r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
667 {
668    if (instr->type != nir_instr_type_alu)
669       return true;
670 
671    auto alu = nir_instr_as_alu(instr);
672    switch (alu->op) {
673    case nir_op_bany_fnequal3:
674    case nir_op_bany_fnequal4:
675    case nir_op_ball_fequal3:
676    case nir_op_ball_fequal4:
677    case nir_op_bany_inequal3:
678    case nir_op_bany_inequal4:
679    case nir_op_ball_iequal3:
680    case nir_op_ball_iequal4:
681    case nir_op_fdot2:
682    case nir_op_fdot3:
683    case nir_op_fdot4:
684       return nir_src_bit_size(alu->src[0].src) == 64;
685    default:
686       return true;
687    }
688 }
689 
690 void
r600_finalize_nir_common(nir_shader * nir,enum amd_gfx_level gfx_level)691 r600_finalize_nir_common(nir_shader *nir, enum amd_gfx_level gfx_level)
692 {
693    const int nir_lower_flrp_mask = 16 | 32 | 64;
694 
695    NIR_PASS_V(nir, nir_lower_flrp, nir_lower_flrp_mask, false);
696 
697    nir_lower_idiv_options idiv_options = {0};
698    NIR_PASS_V(nir, nir_lower_idiv, &idiv_options);
699 
700    NIR_PASS_V(nir, r600_nir_lower_trigen, gfx_level);
701    NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
702    NIR_PASS_V(nir, nir_lower_undef_to_zero);
703 
704    struct nir_lower_tex_options lower_tex_options = {0};
705    lower_tex_options.lower_txp = ~0u;
706    lower_tex_options.lower_txf_offset = true;
707    lower_tex_options.lower_invalid_implicit_lod = true;
708    lower_tex_options.lower_tg4_offsets = true;
709 
710    NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
711    NIR_PASS_V(nir, r600_nir_lower_txl_txf_array_or_cube);
712    NIR_PASS_V(nir, r600_nir_lower_cube_to_2darray);
713 
714    NIR_PASS_V(nir, r600_nir_lower_pack_unpack_2x16);
715 
716    NIR_PASS_V(nir, r600_lower_shared_io);
717    NIR_PASS_V(nir, r600_nir_lower_atomics);
718 
719    if (gfx_level == CAYMAN)
720       NIR_PASS_V(nir, r600_legalize_image_load_store);
721 
722    while (optimize_once(nir))
723       ;
724 }
725 
726 DEBUG_GET_ONCE_NUM_OPTION(skip_opt_start, "R600_SFN_SKIP_OPT_START", -1);
727 DEBUG_GET_ONCE_NUM_OPTION(skip_opt_end, "R600_SFN_SKIP_OPT_END", -1);
728 
729 void
r600_lower_and_optimize_nir(nir_shader * sh,const union r600_shader_key * key,enum amd_gfx_level gfx_level,struct pipe_stream_output_info * so_info)730 r600_lower_and_optimize_nir(nir_shader *sh,
731                             const union r600_shader_key *key,
732                             enum amd_gfx_level gfx_level,
733                             struct pipe_stream_output_info *so_info)
734 {
735    bool lower_64bit =
736       gfx_level < CAYMAN &&
737       (sh->options->lower_int64_options || sh->options->lower_doubles_options) &&
738       ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64);
739 
740    r600::sort_uniforms(sh);
741    NIR_PASS_V(sh, r600_nir_fix_kcache_indirect_access);
742 
743    while (optimize_once(sh))
744       ;
745 
746 
747    if (sh->info.stage == MESA_SHADER_VERTEX)
748       NIR_PASS_V(sh, r600_vectorize_vs_inputs);
749 
750    if (sh->info.stage == MESA_SHADER_FRAGMENT) {
751       NIR_PASS_V(sh, nir_lower_fragcoord_wtrans);
752       NIR_PASS_V(sh, r600_lower_fs_out_to_vector);
753       NIR_PASS_V(sh, nir_opt_dce);
754       NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, 0);
755       r600::sort_fsoutput(sh);
756    }
757    nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in | nir_var_shader_out;
758 
759    NIR_PASS_V(sh, nir_opt_combine_stores, nir_var_shader_out);
760    NIR_PASS_V(sh,
761               nir_lower_io,
762               io_modes,
763               r600_glsl_type_size,
764               nir_lower_io_lower_64bit_to_32);
765 
766    if (sh->info.stage == MESA_SHADER_FRAGMENT)
767       NIR_PASS_V(sh, r600_lower_fs_pos_input);
768 
769    /**/
770    if (lower_64bit)
771       NIR_PASS_V(sh, nir_lower_indirect_derefs, nir_var_function_temp, 10);
772 
773    NIR_PASS_V(sh, nir_opt_constant_folding);
774    NIR_PASS_V(sh, nir_io_add_const_offset_to_base, io_modes);
775 
776    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
777    NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
778    if (lower_64bit)
779       NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
780    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
781    NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
782    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
783    NIR_PASS_V(sh, nir_copy_prop);
784    NIR_PASS_V(sh, nir_opt_dce);
785 
786 
787 
788    if (r600_is_last_vertex_stage(sh, *key))
789       r600_lower_clipvertex_to_clipdist(sh, *so_info);
790 
791    if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
792        sh->info.stage == MESA_SHADER_TESS_EVAL ||
793        (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
794       auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL
795                           ? u_tess_prim_from_shader(sh->info.tess._primitive_mode)
796                           : (mesa_prim)key->tcs.prim_mode;
797       NIR_PASS_V(sh, r600_lower_tess_io, static_cast<mesa_prim>(prim_type));
798    }
799 
800    if (sh->info.stage == MESA_SHADER_TESS_CTRL)
801       NIR_PASS_V(sh, r600_append_tcs_TF_emission, (mesa_prim)key->tcs.prim_mode);
802 
803    if (sh->info.stage == MESA_SHADER_TESS_EVAL) {
804       NIR_PASS_V(sh, nir_lower_tess_coord_z,
805                  sh->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES);
806    }
807 
808    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
809    NIR_PASS_V(sh, nir_lower_phis_to_scalar, false);
810    NIR_PASS_V(sh, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
811    NIR_PASS_V(sh, r600_nir_lower_int_tg4);
812    NIR_PASS_V(sh, r600::r600_nir_lower_tex_to_backend, gfx_level);
813 
814    if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64) {
815       NIR_PASS_V(sh, r600::r600_nir_split_64bit_io);
816       NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
817       NIR_PASS_V(sh, nir_split_64bit_vec3_and_vec4);
818       NIR_PASS_V(sh, nir_lower_int64);
819    }
820 
821    NIR_PASS_V(sh, nir_lower_ubo_vec4);
822    NIR_PASS_V(sh, r600_opt_indirect_fbo_loads);
823 
824    if (lower_64bit)
825       NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
826 
827    if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64) {
828       NIR_PASS_V(sh, r600::r600_split_64bit_uniforms_and_ubo);
829       NIR_PASS_V(sh, nir_lower_doubles, NULL, sh->options->lower_doubles_options);
830    }
831 
832    /* Lower to scalar to let some optimization work out better */
833    while (optimize_once(sh))
834       ;
835 
836    if (lower_64bit)
837       NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
838 
839    NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
840    NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL);
841 
842    NIR_PASS_V(sh,
843               nir_lower_vars_to_scratch,
844               nir_var_function_temp,
845               40,
846               r600_get_natural_size_align_bytes,
847               r600_get_natural_size_align_bytes);
848 
849    while (optimize_once(sh))
850       ;
851 
852    if ((sh->info.bit_sizes_float | sh->info.bit_sizes_int) & 64)
853       NIR_PASS_V(sh, r600::r600_split_64bit_alu_and_phi);
854 
855    bool late_algebraic_progress;
856    do {
857       late_algebraic_progress = false;
858       NIR_PASS(late_algebraic_progress, sh, nir_opt_algebraic_late);
859       NIR_PASS(late_algebraic_progress, sh, nir_opt_constant_folding);
860       NIR_PASS(late_algebraic_progress, sh, nir_copy_prop);
861       NIR_PASS(late_algebraic_progress, sh, nir_opt_dce);
862       NIR_PASS(late_algebraic_progress, sh, nir_opt_cse);
863    } while (late_algebraic_progress);
864 
865    NIR_PASS_V(sh, nir_lower_bool_to_int32);
866 
867    NIR_PASS_V(sh, nir_lower_locals_to_regs, 32);
868    NIR_PASS_V(sh, nir_convert_from_ssa, true);
869    NIR_PASS_V(sh, nir_opt_dce);
870 }
871 
872 void
r600_finalize_and_optimize_shader(r600::Shader * shader)873 r600_finalize_and_optimize_shader(r600::Shader *shader)
874 {
875    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
876       std::cerr << "Shader after conversion from nir\n";
877       shader->print(std::cerr);
878    }
879 
880    auto sfn_skip_opt_start = debug_get_option_skip_opt_start();
881    auto sfn_skip_opt_end = debug_get_option_skip_opt_end();
882    bool skip_shader_opt_per_id = sfn_skip_opt_start >= 0 &&
883                                  sfn_skip_opt_start <= shader->shader_id() &&
884                                  sfn_skip_opt_end >= shader->shader_id();
885 
886    bool skip_shader_opt = r600::sfn_log.has_debug_flag(r600::SfnLog::noopt) ||
887                           skip_shader_opt_per_id;
888 
889    if (!skip_shader_opt) {
890       optimize(*shader);
891       if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
892          std::cerr << "Shader after optimization\n";
893          shader->print(std::cerr);
894       }
895    }
896 
897    split_address_loads(*shader);
898 
899    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
900       std::cerr << "Shader after splitting address loads\n";
901       shader->print(std::cerr);
902    }
903 
904    if (!skip_shader_opt) {
905       optimize(*shader);
906       if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
907          std::cerr << "Shader after optimization\n";
908          shader->print(std::cerr);
909       }
910    }
911 }
912 
913 r600::Shader *
r600_schedule_shader(r600::Shader * shader)914 r600_schedule_shader(r600::Shader *shader)
915 {
916    auto scheduled_shader = r600::schedule(shader);
917    if (r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
918       std::cerr << "Shader after scheduling\n";
919       scheduled_shader->print(std::cerr);
920    }
921 
922    if (!r600::sfn_log.has_debug_flag(r600::SfnLog::nomerge)) {
923 
924       if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge)) {
925          r600::sfn_log << r600::SfnLog::merge << "Shader before RA\n";
926          scheduled_shader->print(std::cerr);
927       }
928 
929       r600::sfn_log << r600::SfnLog::trans << "Merge registers\n";
930       auto lrm = r600::LiveRangeEvaluator().run(*scheduled_shader);
931 
932       if (!r600::register_allocation(lrm)) {
933          R600_ERR("%s: Register allocation failed\n", __func__);
934          /* For now crash if the shader could not be benerated */
935          assert(0);
936          return nullptr;
937       } else if (r600::sfn_log.has_debug_flag(r600::SfnLog::merge) ||
938                  r600::sfn_log.has_debug_flag(r600::SfnLog::steps)) {
939          r600::sfn_log << "Shader after RA\n";
940          scheduled_shader->print(std::cerr);
941       }
942    }
943 
944    return scheduled_shader;
945 }
946