1 /*
2 * Copyright 2024 Alyssa Rosenzweig
3 * Copyright 2024 Valve Corporation
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "gallium/include/pipe/p_defines.h"
8 #include "agx_linker.h"
9 #include "agx_nir_lower_gs.h"
10 #include "agx_nir_lower_vbo.h"
11 #include "agx_nir_passes.h"
12 #include "agx_pack.h"
13 #include "agx_tilebuffer.h"
14 #include "nir.h"
15 #include "nir_builder.h"
16 #include "nir_builder_opcodes.h"
17 #include "nir_lower_blend.h"
18 #include "shader_enums.h"
19
20 /*
21 * Insert code into a fragment shader to lower polygon stipple. The stipple is
22 * passed in a sideband, rather than requiring a texture binding. This is
23 * simpler for drivers to integrate and might be more efficient.
24 */
25 static bool
agx_nir_lower_poly_stipple(nir_shader * s)26 agx_nir_lower_poly_stipple(nir_shader *s)
27 {
28 assert(s->info.stage == MESA_SHADER_FRAGMENT);
29
30 /* Insert at the beginning for performance. */
31 nir_builder b_ =
32 nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
33 nir_builder *b = &b_;
34
35 /* The stipple coordinate is defined at the window coordinate mod 32. It's
36 * reversed along the X-axis to simplify the driver, hence the NOT.
37 */
38 nir_def *raw = nir_u2u32(b, nir_load_pixel_coord(b));
39 nir_def *coord = nir_umod_imm(
40 b,
41 nir_vec2(b, nir_inot(b, nir_channel(b, raw, 0)), nir_channel(b, raw, 1)),
42 32);
43
44 /* Extract the column from the packed bitfield */
45 nir_def *pattern = nir_load_polygon_stipple_agx(b, nir_channel(b, coord, 1));
46 nir_def *bit = nir_ubitfield_extract(b, pattern, nir_channel(b, coord, 0),
47 nir_imm_int(b, 1));
48
49 /* Discard fragments where the pattern is 0 */
50 nir_demote_if(b, nir_ieq_imm(b, bit, 0));
51 s->info.fs.uses_discard = true;
52
53 nir_metadata_preserve(b->impl, nir_metadata_control_flow);
54 return true;
55 }
56
57 static bool
lower_vbo(nir_shader * s,const struct agx_velem_key * key,const struct agx_robustness rs)58 lower_vbo(nir_shader *s, const struct agx_velem_key *key,
59 const struct agx_robustness rs)
60 {
61 struct agx_attribute out[AGX_MAX_VBUFS];
62
63 for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) {
64 out[i] = (struct agx_attribute){
65 .divisor = key[i].divisor,
66 .stride = key[i].stride,
67 .format = key[i].format,
68 .instanced = key[i].instanced,
69 };
70 }
71
72 return agx_nir_lower_vbo(s, out, rs);
73 }
74
75 static int
map_vs_part_uniform(nir_intrinsic_instr * intr,unsigned nr_attribs)76 map_vs_part_uniform(nir_intrinsic_instr *intr, unsigned nr_attribs)
77 {
78 switch (intr->intrinsic) {
79 case nir_intrinsic_load_vbo_base_agx:
80 return 4 * nir_src_as_uint(intr->src[0]);
81 case nir_intrinsic_load_attrib_clamp_agx:
82 return (4 * nr_attribs) + (2 * nir_src_as_uint(intr->src[0]));
83 case nir_intrinsic_load_first_vertex:
84 return (6 * nr_attribs);
85 case nir_intrinsic_load_base_instance:
86 return (6 * nr_attribs) + 2;
87 case nir_intrinsic_load_input_assembly_buffer_agx:
88 return (6 * nr_attribs) + 8;
89 default:
90 return -1;
91 }
92 }
93
94 static int
map_fs_part_uniform(nir_intrinsic_instr * intr)95 map_fs_part_uniform(nir_intrinsic_instr *intr)
96 {
97 switch (intr->intrinsic) {
98 case nir_intrinsic_load_blend_const_color_r_float:
99 return 4;
100 case nir_intrinsic_load_blend_const_color_g_float:
101 return 6;
102 case nir_intrinsic_load_blend_const_color_b_float:
103 return 8;
104 case nir_intrinsic_load_blend_const_color_a_float:
105 return 10;
106 default:
107 return -1;
108 }
109 }
110
111 static bool
lower_non_monolithic_uniforms(nir_builder * b,nir_intrinsic_instr * intr,void * data)112 lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr,
113 void *data)
114 {
115 int unif;
116 if (b->shader->info.stage == MESA_SHADER_VERTEX) {
117 unsigned *nr_attribs = data;
118 unif = map_vs_part_uniform(intr, *nr_attribs);
119 } else {
120 unif = map_fs_part_uniform(intr);
121 }
122
123 if (unif >= 0) {
124 b->cursor = nir_instr_remove(&intr->instr);
125 nir_def *load = nir_load_preamble(b, 1, intr->def.bit_size, .base = unif);
126 nir_def_rewrite_uses(&intr->def, load);
127 return true;
128 } else if (intr->intrinsic == nir_intrinsic_load_texture_handle_agx) {
129 b->cursor = nir_instr_remove(&intr->instr);
130 nir_def *offs =
131 nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_LENGTH);
132 nir_def_rewrite_uses(&intr->def, nir_vec2(b, nir_imm_int(b, 0), offs));
133 return true;
134 } else {
135 return false;
136 }
137 }
138
139 void
agx_nir_vs_prolog(nir_builder * b,const void * key_)140 agx_nir_vs_prolog(nir_builder *b, const void *key_)
141 {
142 const struct agx_vs_prolog_key *key = key_;
143 b->shader->info.stage = MESA_SHADER_VERTEX;
144 b->shader->info.name = "VS prolog";
145
146 /* First, construct a passthrough shader reading each attribute and exporting
147 * the value. We also need to export vertex/instance ID in their usual regs.
148 */
149 unsigned i = 0;
150 nir_def *vec = NULL;
151 unsigned vec_idx = ~0;
152 BITSET_FOREACH_SET(i, key->component_mask, AGX_MAX_ATTRIBS * 4) {
153 unsigned a = i / 4;
154 unsigned c = i % 4;
155
156 if (vec_idx != a) {
157 vec = nir_load_input(b, 4, 32, nir_imm_int(b, 0), .base = a);
158 vec_idx = a;
159 }
160
161 /* ABI: attributes passed starting at r8 */
162 nir_export_agx(b, nir_channel(b, vec, c), .base = 2 * (8 + i));
163 }
164
165 nir_export_agx(b, nir_load_vertex_id(b), .base = 5 * 2);
166 nir_export_agx(b, nir_load_instance_id(b), .base = 6 * 2);
167
168 /* Now lower the resulting program using the key */
169 lower_vbo(b->shader, key->attribs, key->robustness);
170
171 if (!key->hw) {
172 agx_nir_lower_sw_vs(b->shader, key->sw_index_size_B);
173 }
174
175 /* Finally, lower uniforms according to our ABI */
176 unsigned nr = DIV_ROUND_UP(BITSET_LAST_BIT(key->component_mask), 4);
177 nir_shader_intrinsics_pass(b->shader, lower_non_monolithic_uniforms,
178 nir_metadata_control_flow, &nr);
179 b->shader->info.io_lowered = true;
180 }
181
182 static bool
lower_input_to_prolog(nir_builder * b,nir_intrinsic_instr * intr,void * data)183 lower_input_to_prolog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
184 {
185 if (intr->intrinsic != nir_intrinsic_load_input)
186 return false;
187
188 unsigned idx = nir_src_as_uint(intr->src[0]) + nir_intrinsic_base(intr);
189 unsigned comp = nir_intrinsic_component(intr);
190
191 assert(intr->def.bit_size == 32 && "todo: push conversions up?");
192 unsigned base = 4 * idx + comp;
193
194 b->cursor = nir_before_instr(&intr->instr);
195 nir_def *val = nir_load_exported_agx(
196 b, intr->def.num_components, intr->def.bit_size, .base = 16 + 2 * base);
197
198 BITSET_WORD *comps_read = data;
199 nir_component_mask_t mask = nir_def_components_read(&intr->def);
200
201 u_foreach_bit(c, mask) {
202 BITSET_SET(comps_read, base + c);
203 }
204
205 nir_def_replace(&intr->def, val);
206 return true;
207 }
208
209 bool
agx_nir_lower_vs_input_to_prolog(nir_shader * s,BITSET_WORD * attrib_components_read)210 agx_nir_lower_vs_input_to_prolog(nir_shader *s,
211 BITSET_WORD *attrib_components_read)
212 {
213 return nir_shader_intrinsics_pass(s, lower_input_to_prolog,
214 nir_metadata_control_flow,
215 attrib_components_read);
216 }
217
218 static bool
lower_active_samples_to_register(nir_builder * b,nir_intrinsic_instr * intr,void * data)219 lower_active_samples_to_register(nir_builder *b, nir_intrinsic_instr *intr,
220 void *data)
221 {
222 if (intr->intrinsic != nir_intrinsic_load_active_samples_agx)
223 return false;
224
225 b->cursor = nir_instr_remove(&intr->instr);
226
227 /* ABI: r0h contains the active sample mask */
228 nir_def *id = nir_load_exported_agx(b, 1, 16, .base = 1);
229 nir_def_rewrite_uses(&intr->def, id);
230 return true;
231 }
232
233 static bool
lower_tests_zs_intr(nir_builder * b,nir_intrinsic_instr * intr,void * data)234 lower_tests_zs_intr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
235 {
236 bool *value = data;
237 if (intr->intrinsic != nir_intrinsic_load_shader_part_tests_zs_agx)
238 return false;
239
240 b->cursor = nir_instr_remove(&intr->instr);
241 nir_def_rewrite_uses(&intr->def, nir_imm_intN_t(b, *value ? 0xFF : 0, 16));
242 return true;
243 }
244
245 static bool
lower_tests_zs(nir_shader * s,bool value)246 lower_tests_zs(nir_shader *s, bool value)
247 {
248 if (!s->info.fs.uses_discard)
249 return false;
250
251 return nir_shader_intrinsics_pass(s, lower_tests_zs_intr,
252 nir_metadata_control_flow, &value);
253 }
254
255 static inline bool
blend_uses_2src(struct agx_blend_rt_key rt)256 blend_uses_2src(struct agx_blend_rt_key rt)
257 {
258 enum pipe_blendfactor factors[] = {
259 rt.rgb_src_factor,
260 rt.rgb_dst_factor,
261 rt.alpha_src_factor,
262 rt.alpha_dst_factor,
263 };
264
265 for (unsigned i = 0; i < ARRAY_SIZE(factors); ++i) {
266 switch (factors[i]) {
267 case PIPE_BLENDFACTOR_SRC1_COLOR:
268 case PIPE_BLENDFACTOR_SRC1_ALPHA:
269 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
270 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
271 return true;
272 default:
273 break;
274 }
275 }
276
277 return false;
278 }
279
280 void
agx_nir_fs_epilog(nir_builder * b,const void * key_)281 agx_nir_fs_epilog(nir_builder *b, const void *key_)
282 {
283 const struct agx_fs_epilog_key *key = key_;
284 b->shader->info.stage = MESA_SHADER_FRAGMENT;
285 b->shader->info.name = "FS epilog";
286
287 /* First, construct a passthrough shader reading each colour and outputting
288 * the value.
289 */
290 u_foreach_bit(rt, key->link.rt_written) {
291 bool dual_src = (rt == 1) && blend_uses_2src(key->blend.rt[0]);
292 unsigned read_rt = (key->link.broadcast_rt0 && !dual_src) ? 0 : rt;
293 unsigned size = (key->link.size_32 & BITFIELD_BIT(read_rt)) ? 32 : 16;
294
295 nir_def *value =
296 nir_load_exported_agx(b, 4, size, .base = 2 * (4 + (4 * read_rt)));
297
298 if (key->link.rt0_w_1 && read_rt == 0) {
299 value =
300 nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
301 }
302
303 nir_store_output(
304 b, value, nir_imm_int(b, 0),
305 .io_semantics.location = FRAG_RESULT_DATA0 + (dual_src ? 0 : rt),
306 .io_semantics.dual_source_blend_index = dual_src,
307 .src_type = nir_type_float | size);
308 }
309
310 /* Grab registers early, this has to happen in the first block. */
311 nir_def *sample_id = NULL, *write_samples = NULL;
312 if (key->link.sample_shading) {
313 sample_id = nir_load_exported_agx(b, 1, 16, .base = 1);
314 }
315
316 if (key->link.sample_mask_after_force_early) {
317 write_samples = nir_load_exported_agx(b, 1, 16, .base = 7);
318 }
319
320 /* Now lower the resulting program using the key */
321 struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(
322 key->rt_formats, ARRAY_SIZE(key->rt_formats), key->nr_samples, true);
323
324 if (key->force_small_tile)
325 tib.tile_size = (struct agx_tile_size){16, 16};
326
327 bool force_translucent = false;
328 nir_lower_blend_options opts = {
329 .scalar_blend_const = true,
330 .logicop_enable = key->blend.logicop_func != PIPE_LOGICOP_COPY,
331 .logicop_func = key->blend.logicop_func,
332 };
333
334 static_assert(ARRAY_SIZE(opts.format) == 8, "max RTs out of sync");
335
336 for (unsigned i = 0; i < 8; ++i) {
337 opts.format[i] = key->rt_formats[i];
338 opts.rt[i] = (nir_lower_blend_rt){
339 .rgb.src_factor = key->blend.rt[i].rgb_src_factor,
340 .rgb.dst_factor = key->blend.rt[i].rgb_dst_factor,
341 .rgb.func = key->blend.rt[i].rgb_func,
342
343 .alpha.src_factor = key->blend.rt[i].alpha_src_factor,
344 .alpha.dst_factor = key->blend.rt[i].alpha_dst_factor,
345 .alpha.func = key->blend.rt[i].alpha_func,
346
347 .colormask = key->blend.rt[i].colormask,
348 };
349 }
350
351 /* It's more efficient to use masked stores (with
352 * agx_nir_lower_tilebuffer) than to emulate colour masking with
353 * nir_lower_blend.
354 */
355 uint8_t colormasks[8] = {0};
356
357 for (unsigned i = 0; i < 8; ++i) {
358 if (key->rt_formats[i] == PIPE_FORMAT_NONE)
359 continue;
360
361 /* TODO: Flakes some dEQPs, seems to invoke UB. Revisit later.
362 * dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.77
363 * dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.98
364 */
365 if (0 /* agx_tilebuffer_supports_mask(&tib, i) */) {
366 colormasks[i] = key->blend.rt[i].colormask;
367 opts.rt[i].colormask = (uint8_t)BITFIELD_MASK(4);
368 } else {
369 colormasks[i] = (uint8_t)BITFIELD_MASK(4);
370 }
371
372 /* If not all bound RTs are fully written to, we need to force
373 * translucent pass type. agx_nir_lower_tilebuffer will take
374 * care of this for its own colormasks input.
375 */
376 unsigned comps = util_format_get_nr_components(key->rt_formats[i]);
377 if ((opts.rt[i].colormask & BITFIELD_MASK(comps)) !=
378 BITFIELD_MASK(comps)) {
379 force_translucent = true;
380 }
381 }
382
383 /* Alpha-to-coverage must be lowered before alpha-to-one */
384 if (key->blend.alpha_to_coverage)
385 NIR_PASS(_, b->shader, agx_nir_lower_alpha_to_coverage, tib.nr_samples);
386
387 /* Depth/stencil writes must be deferred until after all discards,
388 * particularly alpha-to-coverage.
389 */
390 if (key->link.write_z || key->link.write_s) {
391 nir_store_zs_agx(
392 b, nir_imm_intN_t(b, 0xFF, 16),
393 nir_load_exported_agx(b, 1, 32, .base = 4),
394 nir_load_exported_agx(b, 1, 16, .base = 6),
395 .base = (key->link.write_z ? 1 : 0) | (key->link.write_s ? 2 : 0));
396
397 if (key->link.write_z)
398 b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
399
400 if (key->link.write_s)
401 b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_STENCIL);
402 }
403
404 /* Alpha-to-one must be lowered before blending */
405 if (key->blend.alpha_to_one)
406 NIR_PASS(_, b->shader, agx_nir_lower_alpha_to_one);
407
408 NIR_PASS(_, b->shader, nir_lower_blend, &opts);
409
410 unsigned rt_spill = key->link.rt_spill_base;
411 NIR_PASS(_, b->shader, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
412 write_samples, &force_translucent);
413 NIR_PASS(_, b->shader, agx_nir_lower_texture);
414 NIR_PASS(_, b->shader, agx_nir_lower_multisampled_image_store);
415
416 /* If the API shader runs once per sample, then the epilog runs once per
417 * sample as well, so we need to lower our code to run for a single sample.
418 *
419 * If the API shader runs once per pixel, then the epilog runs once per
420 * pixel. So we run through the monolithic MSAA lowering, which wraps the
421 * epilog in the sample loop if needed. This localizes sample shading
422 * to the epilog, when sample shading is not used but blending is.
423 */
424 if (key->link.sample_shading) {
425 NIR_PASS(_, b->shader, agx_nir_lower_to_per_sample);
426 NIR_PASS(_, b->shader, agx_nir_lower_fs_active_samples_to_register);
427
428 /* Ensure the sample ID is preserved in register. We do this late since it
429 * has to go in the last block, and the above passes might add control
430 * flow when lowering.
431 */
432 b->cursor = nir_after_impl(b->impl);
433 nir_export_agx(b, sample_id, .base = 1);
434 } else {
435 NIR_PASS(_, b->shader, agx_nir_lower_monolithic_msaa, key->nr_samples);
436 }
437
438 /* Finally, lower uniforms according to our ABI */
439 nir_shader_intrinsics_pass(b->shader, lower_non_monolithic_uniforms,
440 nir_metadata_control_flow, NULL);
441
442 /* There is no shader part after the epilog, so we're always responsible for
443 * running our own tests, unless the fragment shader forced early tests.
444 */
445 NIR_PASS(_, b->shader, lower_tests_zs, !key->link.already_ran_zs);
446
447 b->shader->info.io_lowered = true;
448 b->shader->info.fs.uses_fbfetch_output |= force_translucent;
449 b->shader->info.fs.uses_sample_shading = key->link.sample_shading;
450 }
451
452 struct lower_epilog_ctx {
453 struct agx_fs_epilog_link_info *info;
454 nir_variable *masked_samples;
455 };
456
457 static bool
lower_output_to_epilog(nir_builder * b,nir_intrinsic_instr * intr,void * data)458 lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
459 {
460 struct lower_epilog_ctx *ctx = data;
461 struct agx_fs_epilog_link_info *info = ctx->info;
462
463 if (intr->intrinsic == nir_intrinsic_store_zs_agx) {
464 assert(nir_src_as_uint(intr->src[0]) == 0xff && "msaa not yet lowered");
465 b->cursor = nir_instr_remove(&intr->instr);
466
467 unsigned base = nir_intrinsic_base(intr);
468 info->write_z = !!(base & 1);
469 info->write_s = !!(base & 2);
470
471 /* ABI: r2 contains the written depth */
472 if (info->write_z)
473 nir_export_agx(b, intr->src[1].ssa, .base = 4);
474
475 /* ABI: r3l contains the written stencil */
476 if (info->write_s)
477 nir_export_agx(b, intr->src[2].ssa, .base = 6);
478
479 return true;
480 }
481
482 if (intr->intrinsic == nir_intrinsic_discard_agx &&
483 b->shader->info.fs.early_fragment_tests) {
484
485 if (!ctx->masked_samples) {
486 b->cursor = nir_before_impl(nir_shader_get_entrypoint(b->shader));
487
488 ctx->masked_samples =
489 nir_local_variable_create(b->impl, glsl_uint16_t_type(), NULL);
490
491 nir_store_var(b, ctx->masked_samples, nir_imm_intN_t(b, 0xFF, 16),
492 nir_component_mask(1));
493 }
494
495 b->cursor = nir_before_instr(&intr->instr);
496
497 nir_def *mask = nir_load_var(b, ctx->masked_samples);
498 nir_def *mask_2 =
499 nir_ixor(b, intr->src[0].ssa, nir_imm_intN_t(b, 0xff, 16));
500
501 mask = nir_iand(b, mask, mask_2);
502 nir_store_var(b, ctx->masked_samples, mask, nir_component_mask(1));
503
504 nir_instr_remove(&intr->instr);
505 return true;
506 }
507
508 if (intr->intrinsic != nir_intrinsic_store_output)
509 return false;
510
511 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
512
513 /* Fix up gl_FragColor */
514 if (sem.location == FRAG_RESULT_COLOR) {
515 sem.location = FRAG_RESULT_DATA0;
516 info->broadcast_rt0 = true;
517 info->rt_written = ~0;
518 }
519
520 /* We don't use the epilog for sample mask writes */
521 if (sem.location < FRAG_RESULT_DATA0)
522 return false;
523
524 /* Determine the render target index. Dual source blending aliases a second
525 * render target, so get that out of the way now.
526 */
527 unsigned rt = sem.location - FRAG_RESULT_DATA0;
528 rt += nir_src_as_uint(intr->src[1]);
529
530 if (sem.dual_source_blend_index) {
531 assert(rt == 0);
532 rt = 1;
533 }
534
535 info->rt_written |= BITFIELD_BIT(rt);
536
537 b->cursor = nir_instr_remove(&intr->instr);
538 nir_def *vec = intr->src[0].ssa;
539
540 if (vec->bit_size == 32)
541 info->size_32 |= BITFIELD_BIT(rt);
542 else
543 assert(vec->bit_size == 16);
544
545 uint32_t one_f = (vec->bit_size == 32 ? fui(1.0) : _mesa_float_to_half(1.0));
546 unsigned comp = nir_intrinsic_component(intr);
547
548 u_foreach_bit(c, nir_intrinsic_write_mask(intr)) {
549 nir_scalar s = nir_scalar_resolved(vec, c);
550 if (rt == 0 && c == 3 && nir_scalar_is_const(s) &&
551 nir_scalar_as_uint(s) == one_f) {
552
553 info->rt0_w_1 = true;
554 } else {
555 unsigned stride = vec->bit_size / 16;
556
557 nir_export_agx(b, nir_channel(b, vec, c),
558 .base = (2 * (4 + (4 * rt))) + (comp + c) * stride);
559 }
560 }
561
562 return true;
563 }
564
565 bool
agx_nir_lower_fs_output_to_epilog(nir_shader * s,struct agx_fs_epilog_link_info * out)566 agx_nir_lower_fs_output_to_epilog(nir_shader *s,
567 struct agx_fs_epilog_link_info *out)
568 {
569 struct lower_epilog_ctx ctx = {.info = out};
570
571 nir_shader_intrinsics_pass(s, lower_output_to_epilog,
572 nir_metadata_control_flow, &ctx);
573
574 if (ctx.masked_samples) {
575 nir_builder b =
576 nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(s)));
577
578 nir_export_agx(&b, nir_load_var(&b, ctx.masked_samples), .base = 7);
579 out->sample_mask_after_force_early = true;
580
581 bool progress;
582 do {
583 progress = false;
584 NIR_PASS(progress, s, nir_lower_vars_to_ssa);
585 NIR_PASS(progress, s, nir_opt_dce);
586 } while (progress);
587 }
588
589 out->sample_shading = s->info.fs.uses_sample_shading;
590 return true;
591 }
592
593 bool
agx_nir_lower_fs_active_samples_to_register(nir_shader * s)594 agx_nir_lower_fs_active_samples_to_register(nir_shader *s)
595 {
596 return nir_shader_intrinsics_pass(s, lower_active_samples_to_register,
597 nir_metadata_control_flow, NULL);
598 }
599
600 static bool
agx_nir_lower_stats_fs(nir_shader * s)601 agx_nir_lower_stats_fs(nir_shader *s)
602 {
603 assert(s->info.stage == MESA_SHADER_FRAGMENT);
604 nir_builder b_ =
605 nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
606 nir_builder *b = &b_;
607
608 nir_push_if(b, nir_inot(b, nir_load_helper_invocation(b, 1)));
609 nir_def *samples = nir_bit_count(b, nir_load_sample_mask_in(b));
610 unsigned query = PIPE_STAT_QUERY_PS_INVOCATIONS;
611
612 nir_def *addr = nir_load_stat_query_address_agx(b, .base = query);
613 nir_global_atomic(b, 32, addr, samples, .atomic_op = nir_atomic_op_iadd);
614
615 nir_pop_if(b, NULL);
616 nir_metadata_preserve(b->impl, nir_metadata_control_flow);
617 return true;
618 }
619
620 void
agx_nir_fs_prolog(nir_builder * b,const void * key_)621 agx_nir_fs_prolog(nir_builder *b, const void *key_)
622 {
623 const struct agx_fs_prolog_key *key = key_;
624 b->shader->info.stage = MESA_SHADER_FRAGMENT;
625 b->shader->info.name = "FS prolog";
626
627 /* First, insert code for any emulated features */
628 if (key->api_sample_mask != 0xff) {
629 /* Kill samples that are NOT covered by the mask */
630 nir_discard_agx(b, nir_imm_intN_t(b, key->api_sample_mask ^ 0xff, 16));
631 b->shader->info.fs.uses_discard = true;
632 }
633
634 if (key->statistics) {
635 NIR_PASS(_, b->shader, agx_nir_lower_stats_fs);
636 }
637
638 if (key->cull_distance_size) {
639 NIR_PASS(_, b->shader, agx_nir_lower_cull_distance_fs,
640 key->cull_distance_size);
641 }
642
643 if (key->polygon_stipple) {
644 NIR_PASS_V(b->shader, agx_nir_lower_poly_stipple);
645 }
646
647 /* Then, lower the prolog */
648 NIR_PASS(_, b->shader, agx_nir_lower_discard_zs_emit);
649 NIR_PASS(_, b->shader, agx_nir_lower_sample_mask);
650 NIR_PASS(_, b->shader, nir_shader_intrinsics_pass,
651 lower_non_monolithic_uniforms, nir_metadata_control_flow, NULL);
652 NIR_PASS(_, b->shader, lower_tests_zs, key->run_zs_tests);
653
654 b->shader->info.io_lowered = true;
655 }
656