1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_nir.h"
8
9 bool
r600_lower_tess_io_filter(const nir_instr * instr,gl_shader_stage stage)10 r600_lower_tess_io_filter(const nir_instr *instr, gl_shader_stage stage)
11 {
12 if (instr->type != nir_instr_type_intrinsic)
13 return false;
14
15 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
16 switch (op->intrinsic) {
17 case nir_intrinsic_load_input:
18 return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL;
19 case nir_intrinsic_load_output:
20 case nir_intrinsic_load_per_vertex_input:
21 case nir_intrinsic_load_per_vertex_output:
22 case nir_intrinsic_store_per_vertex_output:
23 case nir_intrinsic_load_patch_vertices_in:
24 case nir_intrinsic_load_tess_level_outer:
25 case nir_intrinsic_load_tess_level_inner:
26 return true;
27 case nir_intrinsic_store_output:
28 return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_VERTEX;
29 default:;
30 }
31 return false;
32 }
33
34 static int
get_tcs_varying_offset(nir_intrinsic_instr * op)35 get_tcs_varying_offset(nir_intrinsic_instr *op)
36 {
37 unsigned location = nir_intrinsic_io_semantics(op).location;
38
39 switch (location) {
40 case VARYING_SLOT_POS:
41 return 0;
42 case VARYING_SLOT_PSIZ:
43 return 0x10;
44 case VARYING_SLOT_CLIP_DIST0:
45 return 0x20;
46 case VARYING_SLOT_CLIP_DIST1:
47 return 0x30;
48 case VARYING_SLOT_COL0:
49 return 0x40;
50 case VARYING_SLOT_COL1:
51 return 0x50;
52 case VARYING_SLOT_BFC0:
53 return 0x60;
54 case VARYING_SLOT_BFC1:
55 return 0x70;
56 case VARYING_SLOT_CLIP_VERTEX:
57 return 0x80;
58 case VARYING_SLOT_TESS_LEVEL_OUTER:
59 return 0;
60 case VARYING_SLOT_TESS_LEVEL_INNER:
61 return 0x10;
62 default:
63 if (location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31)
64 return 0x10 * (location - VARYING_SLOT_VAR0) + 0x90;
65
66 if (location >= VARYING_SLOT_PATCH0) {
67 return 0x10 * (location - VARYING_SLOT_PATCH0) + 0x20;
68 }
69 }
70 return 0;
71 }
72
73 static inline nir_def *
r600_tcs_base_address(nir_builder * b,nir_def * param_base,nir_def * rel_patch_id)74 r600_tcs_base_address(nir_builder *b, nir_def *param_base, nir_def *rel_patch_id)
75 {
76 return nir_umad24(b,
77 nir_channel(b, param_base, 0),
78 rel_patch_id,
79 nir_channel(b, param_base, 3));
80 }
81
82 static nir_def *
emil_lsd_in_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op)83 emil_lsd_in_addr(nir_builder *b,
84 nir_def *base,
85 nir_def *patch_id,
86 nir_intrinsic_instr *op)
87 {
88 nir_def *addr =
89 nir_build_alu(b, nir_op_umul24, nir_channel(b, base, 0), patch_id, NULL, NULL);
90
91 auto idx1 = nir_src_as_const_value(op->src[0]);
92 if (!idx1 || idx1->u32 != 0)
93 addr = nir_umad24(b, nir_channel(b, base, 1), op->src[0].ssa, addr);
94
95 auto offset = nir_imm_int(b, get_tcs_varying_offset(op));
96
97 auto idx2 = nir_src_as_const_value(op->src[1]);
98 if (!idx2 || idx2->u32 != 0)
99 offset = nir_iadd(b, nir_ishl_imm(b, op->src[1].ssa, 4), offset);
100
101 return nir_iadd(b, addr, offset);
102 }
103
104 static nir_def *
emil_lsd_out_addr(nir_builder * b,nir_def * base,nir_def * patch_id,nir_intrinsic_instr * op,UNUSED nir_variable_mode mode,int src_offset)105 emil_lsd_out_addr(nir_builder *b,
106 nir_def *base,
107 nir_def *patch_id,
108 nir_intrinsic_instr *op,
109 UNUSED nir_variable_mode mode,
110 int src_offset)
111 {
112
113 nir_def *addr1 =
114 nir_umad24(b, nir_channel(b, base, 0), patch_id, nir_channel(b, base, 2));
115 nir_def *addr2 =
116 nir_umad24(b, nir_channel(b, base, 1), op->src[src_offset].ssa, addr1);
117 int offset = get_tcs_varying_offset(op);
118 return nir_iadd_imm(b,
119 nir_iadd(b,
120 addr2,
121 nir_ishl_imm(b, op->src[src_offset + 1].ssa, 4)),
122 offset);
123 }
124
125 static nir_def *
load_offset_group(nir_builder * b,int ncomponents)126 load_offset_group(nir_builder *b, int ncomponents)
127 {
128 switch (ncomponents) {
129 /* tess outer offsets */
130 case 1:
131 return nir_imm_int(b, 0);
132 case 2:
133 return nir_imm_ivec2(b, 0, 4);
134 case 3:
135 return r600_imm_ivec3(b, 0, 4, 8);
136 case 4:
137 return nir_imm_ivec4(b, 0, 4, 8, 12);
138 /* tess inner offsets */
139 case 5:
140 return nir_imm_int(b, 16);
141 case 6:
142 return nir_imm_ivec2(b, 16, 20);
143 default:
144 debug_printf("Got %d components\n", ncomponents);
145 unreachable("Unsupported component count");
146 }
147 }
148
149 static nir_def *
load_offset_group_from_mask(nir_builder * b,uint32_t mask)150 load_offset_group_from_mask(nir_builder *b, uint32_t mask)
151 {
152 auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
153 return nir_channels(b, full_mask, mask);
154 }
155
156 struct MaskQuery {
157 uint32_t mask;
158 uint32_t ssa_index;
159 nir_alu_instr *alu;
160 int index;
161 uint32_t full_mask;
162 };
163
164 static bool
update_alu_mask(nir_src * src,void * data)165 update_alu_mask(nir_src *src, void *data)
166 {
167 auto mq = reinterpret_cast<MaskQuery *>(data);
168
169 if (mq->ssa_index == src->ssa->index) {
170 mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
171 }
172 ++mq->index;
173
174 return mq->mask != mq->full_mask;
175 }
176
177 static uint32_t
get_dest_usee_mask(nir_intrinsic_instr * op)178 get_dest_usee_mask(nir_intrinsic_instr *op)
179 {
180 MaskQuery mq = {0};
181 mq.full_mask = (1 << op->def.num_components) - 1;
182
183 nir_foreach_use(use_src, &op->def)
184 {
185 auto use_instr = nir_src_parent_instr(use_src);
186 mq.ssa_index = use_src->ssa->index;
187
188 switch (use_instr->type) {
189 case nir_instr_type_alu: {
190 mq.alu = nir_instr_as_alu(use_instr);
191 mq.index = 0;
192 if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
193 return 0xf;
194 break;
195 }
196 case nir_instr_type_intrinsic: {
197 auto intr = nir_instr_as_intrinsic(use_instr);
198 switch (intr->intrinsic) {
199 case nir_intrinsic_store_output:
200 case nir_intrinsic_store_per_vertex_output:
201 mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
202 break;
203 case nir_intrinsic_store_scratch:
204 case nir_intrinsic_store_local_shared_r600:
205 mq.mask |= nir_intrinsic_write_mask(intr);
206 break;
207 default:
208 return 0xf;
209 }
210 break;
211 }
212 default:
213 return 0xf;
214 }
215 }
216 return mq.mask;
217 }
218
219 static void
replace_load_instr(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)220 replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
221 {
222 uint32_t mask = get_dest_usee_mask(op);
223 if (mask) {
224 nir_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
225 if (nir_intrinsic_component(op))
226 addr_outer =
227 nir_iadd_imm(b, addr_outer, 4 * nir_intrinsic_component(op));
228
229 auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
230
231 auto undef = nir_undef(b, 1, 32);
232 int comps = op->def.num_components;
233 nir_def *remix[4] = {undef, undef, undef, undef};
234
235 int chan = 0;
236 for (int i = 0; i < comps; ++i) {
237 if (mask & (1 << i)) {
238 remix[i] = nir_channel(b, new_load, chan++);
239 }
240 }
241 auto new_load_remixed = nir_vec(b, remix, comps);
242 nir_def_rewrite_uses(&op->def, new_load_remixed);
243 }
244 nir_instr_remove(&op->instr);
245 }
246
247 static void
emit_store_lds(nir_builder * b,nir_intrinsic_instr * op,nir_def * addr)248 emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_def *addr)
249 {
250 uint32_t orig_writemask = nir_intrinsic_write_mask(op) << nir_intrinsic_component(op);
251
252 for (int i = 0; i < 2; ++i) {
253 unsigned test_mask = (0x3 << 2 * i);
254 unsigned wmask = orig_writemask & test_mask;
255 if (!(wmask))
256 continue;
257
258 uint32_t writemask = wmask >> nir_intrinsic_component(op);
259
260 bool start_even = (orig_writemask & (1u << (2 * i)));
261 nir_def *addr2 = nir_iadd_imm(b, addr, 8 * i + (start_even ? 0 : 4));
262 nir_store_local_shared_r600(b, op->src[0].ssa, addr2,
263 .write_mask = writemask);
264 }
265 }
266
267 static nir_def *
emil_tcs_io_offset(nir_builder * b,nir_def * addr,nir_intrinsic_instr * op,int src_offset)268 emil_tcs_io_offset(nir_builder *b,
269 nir_def *addr,
270 nir_intrinsic_instr *op,
271 int src_offset)
272 {
273 int offset = get_tcs_varying_offset(op);
274 return nir_iadd_imm(b,
275 nir_iadd(b,
276 addr,
277 nir_ishl_imm(b, op->src[src_offset].ssa, 4)),
278 offset);
279 }
280
281 inline unsigned
outer_tf_components(mesa_prim prim_type)282 outer_tf_components(mesa_prim prim_type)
283 {
284 switch (prim_type) {
285 case MESA_PRIM_LINES:
286 return 2;
287 case MESA_PRIM_TRIANGLES:
288 return 3;
289 case MESA_PRIM_QUADS:
290 return 4;
291 default:
292 return 0;
293 }
294 }
295
296 static bool
r600_lower_tess_io_impl(nir_builder * b,nir_instr * instr,enum mesa_prim prim_type)297 r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum mesa_prim prim_type)
298 {
299 static nir_def *load_in_param_base = nullptr;
300 static nir_def *load_out_param_base = nullptr;
301
302 b->cursor = nir_before_instr(instr);
303 nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
304
305 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
306 load_in_param_base = nir_load_tcs_in_param_base_r600(b);
307 load_out_param_base = nir_load_tcs_out_param_base_r600(b);
308 } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
309 load_in_param_base = nir_load_tcs_out_param_base_r600(b);
310 } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
311 load_out_param_base = nir_load_tcs_in_param_base_r600(b);
312 }
313
314 auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
315
316 unsigned tf_inner_address_offset = 0;
317 unsigned ncomps_correct = 0;
318
319 switch (op->intrinsic) {
320 case nir_intrinsic_load_patch_vertices_in: {
321 nir_def *vertices_in;
322 if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
323 vertices_in = nir_channel(b, load_in_param_base, 2);
324 else {
325 auto base = nir_load_tcs_in_param_base_r600(b);
326 vertices_in = nir_channel(b, base, 2);
327 }
328 nir_def_replace(&op->def, vertices_in);
329 return true;
330 }
331 case nir_intrinsic_load_per_vertex_input: {
332 nir_def *addr =
333 b->shader->info.stage == MESA_SHADER_TESS_CTRL
334 ? emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op)
335 : emil_lsd_out_addr(
336 b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
337 replace_load_instr(b, op, addr);
338 return true;
339 }
340 case nir_intrinsic_store_per_vertex_output: {
341 nir_def *addr = emil_lsd_out_addr(
342 b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
343 emit_store_lds(b, op, addr);
344 nir_instr_remove(instr);
345 return true;
346 }
347 case nir_intrinsic_load_per_vertex_output: {
348 nir_def *addr = emil_lsd_out_addr(
349 b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
350 replace_load_instr(b, op, addr);
351 return true;
352 }
353 case nir_intrinsic_store_output: {
354 nir_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
355 ? r600_tcs_base_address(b, load_out_param_base, rel_patch_id)
356 : nir_build_alu(b,
357 nir_op_umul24,
358 nir_channel(b, load_out_param_base, 1),
359 rel_patch_id,
360 NULL,
361 NULL);
362 addr = emil_tcs_io_offset(b, addr, op, 1);
363 emit_store_lds(b, op, addr);
364 nir_instr_remove(instr);
365 return true;
366 }
367 case nir_intrinsic_load_output: {
368 nir_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
369 addr = emil_tcs_io_offset(b, addr, op, 0);
370 replace_load_instr(b, op, addr);
371 return true;
372 }
373 case nir_intrinsic_load_input: {
374 nir_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
375 addr = emil_tcs_io_offset(b, addr, op, 0);
376 replace_load_instr(b, op, addr);
377 return true;
378 }
379 case nir_intrinsic_load_tess_level_inner:
380 tf_inner_address_offset = 4;
381 ncomps_correct = 2;
382 FALLTHROUGH;
383 case nir_intrinsic_load_tess_level_outer: {
384 auto ncomps = outer_tf_components(prim_type);
385 if (!ncomps)
386 return false;
387 ncomps -= ncomps_correct;
388 auto base = nir_load_tcs_out_param_base_r600(b);
389 auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
390 nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
391 nir_def *addr_outer =
392 nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
393
394 nir_def *tf = nir_load_local_shared_r600(b, 32, addr_outer);
395 if (ncomps < 4 && b->shader->info.stage != MESA_SHADER_TESS_EVAL) {
396 auto undef = nir_undef(b, 1, 32);
397 nir_def *srcs[4] = {undef, undef, undef, undef};
398 for (unsigned i = 0; i < ncomps; ++i)
399 srcs[i] = nir_channel(b, tf, i);
400 auto help = nir_vec(b, srcs, 4);
401 nir_def_rewrite_uses(&op->def, help);
402 } else {
403 nir_def_rewrite_uses(&op->def, tf);
404 }
405 nir_instr_remove(instr);
406 return true;
407 }
408 default:;
409 }
410
411 return false;
412 }
413
414 bool
r600_lower_tess_io(nir_shader * shader,enum mesa_prim prim_type)415 r600_lower_tess_io(nir_shader *shader, enum mesa_prim prim_type)
416 {
417 bool progress = false;
418 nir_foreach_function_impl(impl, shader)
419 {
420 nir_builder b = nir_builder_create(impl);
421
422 nir_foreach_block(block, impl)
423 {
424 nir_foreach_instr_safe(instr, block)
425 {
426 if (instr->type != nir_instr_type_intrinsic)
427 continue;
428
429 if (r600_lower_tess_io_filter(instr, shader->info.stage))
430 progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
431 }
432 }
433 }
434 return progress;
435 }
436
437 bool
r600_emit_tf(nir_builder * b,nir_def * val)438 r600_emit_tf(nir_builder *b, nir_def *val)
439 {
440 nir_intrinsic_instr *store_tf =
441 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
442 store_tf->num_components = val->num_components;
443 store_tf->src[0] = nir_src_for_ssa(val);
444 nir_builder_instr_insert(b, &store_tf->instr);
445 return true;
446 }
447
448 bool
r600_append_tcs_TF_emission(nir_shader * shader,enum mesa_prim prim_type)449 r600_append_tcs_TF_emission(nir_shader *shader, enum mesa_prim prim_type)
450 {
451 if (shader->info.stage != MESA_SHADER_TESS_CTRL)
452 return false;
453
454 nir_foreach_function_impl(impl, shader)
455 {
456 nir_foreach_block(block, impl)
457 {
458 nir_foreach_instr_safe(instr, block)
459 {
460 if (instr->type != nir_instr_type_intrinsic)
461 continue;
462 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
463 if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
464 return false;
465 }
466 }
467 }
468 }
469
470 assert(exec_list_length(&shader->functions) == 1);
471 nir_function *f = (nir_function *)shader->functions.get_head();
472 nir_builder builder = nir_builder_create(f->impl);
473 nir_builder *b = &builder;
474
475 auto outer_comps = outer_tf_components(prim_type);
476 if (!outer_comps)
477 return false;
478
479 unsigned inner_comps = outer_comps - 2;
480 unsigned stride = (inner_comps + outer_comps) * 4;
481
482 b->cursor = nir_after_cf_list(&f->impl->body);
483
484 nir_def *invocation_id = nir_load_invocation_id(b);
485
486 nir_push_if(b, nir_ieq_imm(b, invocation_id, 0));
487 auto base = nir_load_tcs_out_param_base_r600(b);
488 auto rel_patch_id = nir_load_tcs_rel_patch_id_r600(b);
489
490 nir_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
491
492 nir_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
493 nir_def *tf_outer = nir_load_local_shared_r600(b, 32, addr_outer);
494
495 std::vector<nir_def *> tf_out;
496
497 nir_def *tf_out_base = nir_load_tcs_tess_factor_base_r600(b);
498 nir_def *out_addr0 = nir_umad24(b,
499 rel_patch_id,
500 nir_imm_int(b, stride),
501 tf_out_base);
502 int chanx = 0;
503 int chany = 1;
504
505 if (prim_type == MESA_PRIM_LINES)
506 std::swap(chanx, chany);
507
508 int inner_base = 12;
509
510 tf_out.push_back(nir_vec2(b,
511 out_addr0,
512 nir_channel(b, tf_outer, chanx)));
513
514 tf_out.push_back(nir_vec2(b, nir_iadd_imm(b, out_addr0, 4),
515 nir_channel(b, tf_outer, chany)));
516
517
518 if (outer_comps > 2) {
519 tf_out.push_back(nir_vec2(b,
520 nir_iadd_imm(b, out_addr0, 8),
521 nir_channel(b, tf_outer, 2)));
522 }
523
524 if (outer_comps > 3) {
525 tf_out.push_back(nir_vec2(b,
526 nir_iadd_imm(b, out_addr0, 12),
527 nir_channel(b, tf_outer, 3)));
528 inner_base = 16;
529
530 }
531
532 if (inner_comps) {
533 nir_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
534 nir_def *tf_inner = nir_load_local_shared_r600(b, 32, addr1);
535
536 tf_out.push_back(nir_vec2(b,
537 nir_iadd_imm(b, out_addr0, inner_base),
538 nir_channel(b, tf_inner, 0)));
539
540
541 if (inner_comps > 1) {
542 tf_out.push_back(nir_vec2(b,
543 nir_iadd_imm(b, out_addr0, inner_base + 4),
544 nir_channel(b, tf_inner, 1)));
545
546 }
547 }
548
549 for (auto tf : tf_out)
550 r600_emit_tf(b, tf);
551
552 nir_pop_if(b, nullptr);
553
554 nir_metadata_preserve(f->impl, nir_metadata_none);
555
556 return true;
557 }
558