xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* -*- mesa-c++  -*-
2  * Copyright 2022 Collabora LTD
3  * Author: Gert Wollny <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "sfn_nir.h"
8 
9 bool
r600_lower_tess_io_filter(const nir_instr * instr,gl_shader_stage stage)10 r600_lower_tess_io_filter(const nir_instr *instr, gl_shader_stage stage)
11 {
12    if (instr->type != nir_instr_type_intrinsic)
13       return false;
14 
15    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
16    switch (op->intrinsic) {
17    case nir_intrinsic_load_input:
18       return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL;
19    case nir_intrinsic_load_output:
20    case nir_intrinsic_load_per_vertex_input:
21    case nir_intrinsic_load_per_vertex_output:
22    case nir_intrinsic_store_per_vertex_output:
23    case nir_intrinsic_load_patch_vertices_in:
24    case nir_intrinsic_load_tess_level_outer:
25    case nir_intrinsic_load_tess_level_inner:
26       return true;
27    case nir_intrinsic_store_output:
28       return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_VERTEX;
29    default:;
30    }
31    return false;
32 }
33 
34 static int
get_tcs_varying_offset(nir_intrinsic_instr * op)35 get_tcs_varying_offset(nir_intrinsic_instr *op)
36 {
37    unsigned location = nir_intrinsic_io_semantics(op).location;
38 
39    switch (location) {
40    case VARYING_SLOT_POS:
41       return 0;
42    case VARYING_SLOT_PSIZ:
43       return 0x10;
44    case VARYING_SLOT_CLIP_DIST0:
45       return 0x20;
46    case VARYING_SLOT_CLIP_DIST1:
47       return 0x30;
48    case VARYING_SLOT_COL0:
49       return 0x40;
50    case VARYING_SLOT_COL1:
51       return 0x50;
52    case VARYING_SLOT_BFC0:
53       return 0x60;
54    case VARYING_SLOT_BFC1:
55       return 0x70;
56    case VARYING_SLOT_CLIP_VERTEX:
57       return 0x80;
58    case VARYING_SLOT_TESS_LEVEL_OUTER:
59       return 0;
60    case VARYING_SLOT_TESS_LEVEL_INNER:
61       return 0x10;
62    default:
63       if (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31)
64          return 0x10 * (location - VARYING_SLOT_VAR0) + 0x90;
65 
66       if (location >= VARYING_SLOT_PATCH0) {
67          return 0x10 * (location - VARYING_SLOT_PATCH0) + 0x20;
68       }
69    }
70    return 0;
71 }
72 
73 static inline nir_def *
r600_tcs_base_address(nir_builder * b,nir_def * param_base,nir_def * rel_patch_id)74 r600_tcs_base_address(nir_builder *b, nir_def *param_base, nir_def *rel_patch_id)
75 {
76    return nir_umad24(b,
77                      nir_channel(b, param_base, 0),
78                      rel_patch_id,
79                      nir_channel(b, param_base, 3));
80 }
81 
82 static nir_def *
emil_lsd_in_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op)83 emil_lsd_in_addr(nir_builder *b,
84                  nir_def *base,
85                  nir_def *patch_id,
86                  nir_intrinsic_instr *op)
87 {
88    nir_def *addr =
89       nir_build_alu(b, nir_op_umul24, nir_channel(b, base, 0), patch_id, NULL, NULL);
90 
91    auto idx1 = nir_src_as_const_value(op->src[0]);
92    if (!idx1 || idx1->u32 != 0)
93       addr = nir_umad24(b, nir_channel(b, base, 1), op->src[0].ssa, addr);
94 
95    auto offset = nir_imm_int(b, get_tcs_varying_offset(op));
96 
97    auto idx2 = nir_src_as_const_value(op->src[1]);
98    if (!idx2 || idx2->u32 != 0)
99       offset = nir_iadd(b, nir_ishl_imm(b, op->src[1].ssa, 4), offset);
100 
101    return nir_iadd(b, addr, offset);
102 }
103 
104 static nir_def *
emil_lsd_out_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op,UNUSED nir_variable_mode mode,int src_offset)105 emil_lsd_out_addr(nir_builder *b,
106                   nir_def *base,
107                   nir_def *patch_id,
108                   nir_intrinsic_instr *op,
109                   UNUSED nir_variable_mode mode,
110                   int src_offset)
111 {
112 
113    nir_def *addr1 =
114       nir_umad24(b, nir_channel(b, base, 0), patch_id, nir_channel(b, base, 2));
115    nir_def *addr2 =
116       nir_umad24(b, nir_channel(b, base, 1), op->src[src_offset].ssa, addr1);
117    int offset = get_tcs_varying_offset(op);
118    return nir_iadd_imm(b,
119                        nir_iadd(b,
120                                 addr2,
121                                 nir_ishl_imm(b, op->src[src_offset + 1].ssa, 4)),
122                        offset);
123 }
124 
125 static nir_def *
load_offset_group(nir_builder * b,int ncomponents)126 load_offset_group(nir_builder *b, int ncomponents)
127 {
128    switch (ncomponents) {
129    /* tess outer offsets */
130    case 1:
131       return nir_imm_int(b, 0);
132    case 2:
133       return nir_imm_ivec2(b, 0, 4);
134    case 3:
135       return r600_imm_ivec3(b, 0, 4, 8);
136    case 4:
137       return nir_imm_ivec4(b, 0, 4, 8, 12);
138       /* tess inner offsets */
139    case 5:
140       return nir_imm_int(b, 16);
141    case 6:
142       return nir_imm_ivec2(b, 16, 20);
143    default:
144       debug_printf("Got %d components\n", ncomponents);
145       unreachable("Unsupported component count");
146    }
147 }
148 
149 static nir_def *
load_offset_group_from_mask(nir_builder * b,uint32_t mask)150 load_offset_group_from_mask(nir_builder *b, uint32_t mask)
151 {
152    auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
153    return nir_channels(b, full_mask, mask);
154 }
155 
156 struct MaskQuery {
157    uint32_t mask;
158    uint32_t ssa_index;
159    nir_alu_instr *alu;
160    int index;
161    uint32_t full_mask;
162 };
163 
164 static bool
update_alu_mask(nir_src * src,void * data)165 update_alu_mask(nir_src *src, void *data)
166 {
167    auto mq = reinterpret_cast<MaskQuery *>(data);
168 
169    if (mq->ssa_index == src->ssa->index) {
170       mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
171    }
172    ++mq->index;
173 
174    return mq->mask != mq->full_mask;
175 }
176 
177 static uint32_t
get_dest_usee_mask(nir_intrinsic_instr * op)178 get_dest_usee_mask(nir_intrinsic_instr *op)
179 {
180    MaskQuery mq = {0};
181    mq.full_mask = (1 << op->def.num_components) - 1;
182 
183    nir_foreach_use(use_src, &op->def)
184    {
185       auto use_instr = nir_src_parent_instr(use_src);
186       mq.ssa_index = use_src->ssa->index;
187 
188       switch (use_instr->type) {
189       case nir_instr_type_alu: {
190          mq.alu = nir_instr_as_alu(use_instr);
191          mq.index = 0;
192          if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
193             return 0xf;
194          break;
195       }
196       case nir_instr_type_intrinsic: {
197          auto intr = nir_instr_as_intrinsic(use_instr);
198          switch (intr->intrinsic) {
199          case nir_intrinsic_store_output:
200          case nir_intrinsic_store_per_vertex_output:
201             mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
202             break;
203          case nir_intrinsic_store_scratch:
204          case nir_intrinsic_store_local_shared_r600:
205             mq.mask |= nir_intrinsic_write_mask(intr);
206             break;
207          default:
208             return 0xf;
209          }
210          break;
211       }
212       default:
213          return 0xf;
214       }
215    }
216    return mq.mask;
217 }
218 
219 static void
replace_load_instr(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)220 replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
221 {
222    uint32_t mask = get_dest_usee_mask(op);
223    if (mask) {
224       nir_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
225       if (nir_intrinsic_component(op))
226          addr_outer =
227             nir_iadd_imm(b, addr_outer, 4 * nir_intrinsic_component(op));
228 
229       auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
230 
231       auto undef = nir_undef(b, 1, 32);
232       int comps = op->def.num_components;
233       nir_def *remix[4] = {undef, undef, undef, undef};
234 
235       int chan = 0;
236       for (int i = 0; i < comps; ++i) {
237          if (mask & (1 << i)) {
238             remix[i] = nir_channel(b, new_load, chan++);
239          }
240       }
241       auto new_load_remixed = nir_vec(b, remix, comps);
242       nir_def_rewrite_uses(&op->def, new_load_remixed);
243    }
244    nir_instr_remove(&op->instr);
245 }
246 
247 static void
emit_store_lds(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)248 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
249 {
250    uint32_t orig_writemask = nir_intrinsic_write_mask(op) << nir_intrinsic_component(op);
251 
252    for (int i = 0; i < 2; ++i) {
253       unsigned test_mask = (0x3 << 2 * i);
254       unsigned wmask = orig_writemask & test_mask;
255       if (!(wmask))
256          continue;
257 
258       uint32_t writemask = wmask >> nir_intrinsic_component(op);
259 
260       bool start_even = (orig_writemask & (1u << (2 * i)));
261       nir_def *addr2 = nir_iadd_imm(b, addr, 8 * i + (start_even ? 0 : 4));
262       nir_store_local_shared_r600(b, op->src[0].ssa, addr2,
263                                   .write_mask = writemask);
264    }
265 }
266 
267 static nir_def *
emil_tcs_io_offset(nir_builder * b,nir_def * addr,nir_intrinsic_instr * op,int src_offset)268 emil_tcs_io_offset(nir_builder *b,
269                    nir_def *addr,
270                    nir_intrinsic_instr *op,
271                    int src_offset)
272 {
273    int offset = get_tcs_varying_offset(op);
274    return nir_iadd_imm(b,
275                        nir_iadd(b,
276                                 addr,
277                                 nir_ishl_imm(b, op->src[src_offset].ssa, 4)),
278                        offset);
279 }
280 
281 inline unsigned
outer_tf_components(mesa_prim prim_type)282 outer_tf_components(mesa_prim prim_type)
283 {
284    switch (prim_type) {
285    case MESA_PRIM_LINES:
286       return 2;
287    case MESA_PRIM_TRIANGLES:
288       return 3;
289    case MESA_PRIM_QUADS:
290       return 4;
291    default:
292       return 0;
293    }
294 }
295 
296 static bool
r600_lower_tess_io_impl(nir_builder * b,nir_instr * instr,enum mesa_prim prim_type)297 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum mesa_prim prim_type)
298 {
299    static nir_def *load_in_param_base = nullptr;
300    static nir_def *load_out_param_base = nullptr;
301 
302    b->cursor = nir_before_instr(instr);
303    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
304 
305    if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
306       load_in_param_base = nir_load_tcs_in_param_base_r600(b);
307       load_out_param_base = nir_load_tcs_out_param_base_r600(b);
308    } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
309       load_in_param_base = nir_load_tcs_out_param_base_r600(b);
310    } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
311       load_out_param_base = nir_load_tcs_in_param_base_r600(b);
312    }
313 
314    auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
315 
316    unsigned tf_inner_address_offset = 0;
317    unsigned ncomps_correct = 0;
318 
319    switch (op->intrinsic) {
320    case nir_intrinsic_load_patch_vertices_in: {
321       nir_def *vertices_in;
322       if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
323          vertices_in = nir_channel(b, load_in_param_base, 2);
324       else {
325          auto base = nir_load_tcs_in_param_base_r600(b);
326          vertices_in = nir_channel(b, base, 2);
327       }
328       nir_def_replace(&op->def, vertices_in);
329       return true;
330    }
331    case nir_intrinsic_load_per_vertex_input: {
332       nir_def *addr =
333          b->shader->info.stage == MESA_SHADER_TESS_CTRL
334             ? emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op)
335             : emil_lsd_out_addr(
336                  b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
337       replace_load_instr(b, op, addr);
338       return true;
339    }
340    case nir_intrinsic_store_per_vertex_output: {
341       nir_def *addr = emil_lsd_out_addr(
342          b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
343       emit_store_lds(b, op, addr);
344       nir_instr_remove(instr);
345       return true;
346    }
347    case nir_intrinsic_load_per_vertex_output: {
348       nir_def *addr = emil_lsd_out_addr(
349          b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
350       replace_load_instr(b, op, addr);
351       return true;
352    }
353    case nir_intrinsic_store_output: {
354       nir_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
355                              ? r600_tcs_base_address(b, load_out_param_base, rel_patch_id)
356                              : nir_build_alu(b,
357                                              nir_op_umul24,
358                                              nir_channel(b, load_out_param_base, 1),
359                                              rel_patch_id,
360                                              NULL,
361                                              NULL);
362       addr = emil_tcs_io_offset(b, addr, op, 1);
363       emit_store_lds(b, op, addr);
364       nir_instr_remove(instr);
365       return true;
366    }
367    case nir_intrinsic_load_output: {
368       nir_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
369       addr = emil_tcs_io_offset(b, addr, op, 0);
370       replace_load_instr(b, op, addr);
371       return true;
372    }
373    case nir_intrinsic_load_input: {
374       nir_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
375       addr = emil_tcs_io_offset(b, addr, op, 0);
376       replace_load_instr(b, op, addr);
377       return true;
378    }
379    case nir_intrinsic_load_tess_level_inner:
380       tf_inner_address_offset = 4;
381       ncomps_correct = 2;
382       FALLTHROUGH;
383    case nir_intrinsic_load_tess_level_outer: {
384       auto ncomps = outer_tf_components(prim_type);
385       if (!ncomps)
386          return false;
387       ncomps -= ncomps_correct;
388       auto base = nir_load_tcs_out_param_base_r600(b);
389       auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
390       nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
391       nir_def *addr_outer =
392          nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
393 
394       nir_def *tf = nir_load_local_shared_r600(b, 32, addr_outer);
395       if (ncomps < 4 && b->shader->info.stage != MESA_SHADER_TESS_EVAL) {
396          auto undef = nir_undef(b, 1, 32);
397          nir_def *srcs[4] = {undef, undef, undef, undef};
398          for (unsigned i = 0; i < ncomps; ++i)
399             srcs[i] = nir_channel(b, tf, i);
400          auto help = nir_vec(b, srcs, 4);
401          nir_def_rewrite_uses(&op->def, help);
402       } else {
403          nir_def_rewrite_uses(&op->def, tf);
404       }
405       nir_instr_remove(instr);
406       return true;
407    }
408    default:;
409    }
410 
411    return false;
412 }
413 
414 bool
r600_lower_tess_io(nir_shader * shader,enum mesa_prim prim_type)415 r600_lower_tess_io(nir_shader *shader, enum mesa_prim prim_type)
416 {
417    bool progress = false;
418    nir_foreach_function_impl(impl, shader)
419    {
420       nir_builder b = nir_builder_create(impl);
421 
422       nir_foreach_block(block, impl)
423       {
424          nir_foreach_instr_safe(instr, block)
425          {
426             if (instr->type != nir_instr_type_intrinsic)
427                continue;
428 
429             if (r600_lower_tess_io_filter(instr, shader->info.stage))
430                progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
431          }
432       }
433    }
434    return progress;
435 }
436 
437 bool
r600_emit_tf(nir_builder * b,nir_def * val)438 r600_emit_tf(nir_builder *b, nir_def *val)
439 {
440    nir_intrinsic_instr *store_tf =
441       nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
442    store_tf->num_components = val->num_components;
443    store_tf->src[0] = nir_src_for_ssa(val);
444    nir_builder_instr_insert(b, &store_tf->instr);
445    return true;
446 }
447 
448 bool
r600_append_tcs_TF_emission(nir_shader * shader,enum mesa_prim prim_type)449 r600_append_tcs_TF_emission(nir_shader *shader, enum mesa_prim prim_type)
450 {
451    if (shader->info.stage != MESA_SHADER_TESS_CTRL)
452       return false;
453 
454    nir_foreach_function_impl(impl, shader)
455    {
456       nir_foreach_block(block, impl)
457       {
458          nir_foreach_instr_safe(instr, block)
459          {
460             if (instr->type != nir_instr_type_intrinsic)
461                continue;
462             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
463             if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
464                return false;
465             }
466          }
467       }
468    }
469 
470    assert(exec_list_length(&shader->functions) == 1);
471    nir_function *f = (nir_function *)shader->functions.get_head();
472    nir_builder builder = nir_builder_create(f->impl);
473    nir_builder *b = &builder;
474 
475    auto outer_comps = outer_tf_components(prim_type);
476    if (!outer_comps)
477       return false;
478 
479    unsigned inner_comps = outer_comps - 2;
480    unsigned stride = (inner_comps + outer_comps) * 4;
481 
482    b->cursor = nir_after_cf_list(&f->impl->body);
483 
484    nir_def *invocation_id = nir_load_invocation_id(b);
485 
486    nir_push_if(b, nir_ieq_imm(b, invocation_id, 0));
487    auto base = nir_load_tcs_out_param_base_r600(b);
488    auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
489 
490    nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
491 
492    nir_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
493    nir_def *tf_outer = nir_load_local_shared_r600(b, 32, addr_outer);
494 
495    std::vector<nir_def *> tf_out;
496 
497    nir_def *tf_out_base = nir_load_tcs_tess_factor_base_r600(b);
498    nir_def *out_addr0 = nir_umad24(b,
499                                    rel_patch_id,
500                                    nir_imm_int(b, stride),
501                                    tf_out_base);
502    int chanx = 0;
503    int chany = 1;
504 
505    if (prim_type == MESA_PRIM_LINES)
506       std::swap(chanx, chany);
507 
508    int inner_base = 12;
509 
510    tf_out.push_back(nir_vec2(b,
511                              out_addr0,
512                              nir_channel(b, tf_outer, chanx)));
513 
514    tf_out.push_back(nir_vec2(b, nir_iadd_imm(b, out_addr0, 4),
515                              nir_channel(b, tf_outer, chany)));
516 
517 
518    if (outer_comps > 2) {
519       tf_out.push_back(nir_vec2(b,
520                                 nir_iadd_imm(b, out_addr0, 8),
521                                 nir_channel(b, tf_outer, 2)));
522    }
523 
524    if (outer_comps > 3) {
525       tf_out.push_back(nir_vec2(b,
526                                 nir_iadd_imm(b, out_addr0, 12),
527                                 nir_channel(b, tf_outer, 3)));
528       inner_base = 16;
529 
530    }
531 
532    if (inner_comps) {
533       nir_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
534       nir_def *tf_inner = nir_load_local_shared_r600(b, 32, addr1);
535 
536       tf_out.push_back(nir_vec2(b,
537                                 nir_iadd_imm(b, out_addr0, inner_base),
538                                 nir_channel(b, tf_inner, 0)));
539 
540 
541       if (inner_comps > 1) {
542          tf_out.push_back(nir_vec2(b,
543                                    nir_iadd_imm(b, out_addr0, inner_base + 4),
544                                    nir_channel(b, tf_inner, 1)));
545 
546       }
547    }
548 
549    for (auto tf : tf_out)
550       r600_emit_tf(b, tf);
551 
552    nir_pop_if(b, nullptr);
553 
554    nir_metadata_preserve(f->impl, nir_metadata_none);
555 
556    return true;
557 }
558