1 /*
2 * Copyright 2023 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "compiler/nir/nir.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "util/macros.h"
9 #include "agx_compile.h"
10 #include "agx_nir.h"
11 #include "glsl_types.h"
12 #include "nir_builder_opcodes.h"
13 #include "shader_enums.h"
14
15 /*
16 * Lower cull distance to discard. From the spec:
17 *
18 * If the cull distance for any enabled cull half-space is negative for all
19 * of the vertices of the primitive under consideration, the primitive is
20 * discarded.
21 *
22 * We don't have a direct way to read the cull distance at non-provoking
23 * vertices in the fragment shader. Instead, we interpolate the quantity:
24 *
25 * cull distance >= 0.0 ? 1.0 : 0.0
26 *
27 * Then, the discard condition is equivalent to:
28 *
29 * "quantity is zero for all vertices of the primitive"
30 *
31 * which by linearity is equivalent to:
32 *
33 * quantity is zero somewhere in the primitive and quantity has zero
34 * first-order screen space derivatives.
35 *
36 * which we can determine with ease in the fragment shader.
37 */
38
39 static bool
lower_write(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)40 lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
41 {
42 if (intr->intrinsic != nir_intrinsic_store_output)
43 return false;
44
45 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
46 if (sem.location != VARYING_SLOT_CLIP_DIST0 &&
47 sem.location != VARYING_SLOT_CLIP_DIST1)
48 return false;
49
50 signed loc = sem.location + nir_src_as_uint(intr->src[1]);
51 unsigned total_component =
52 (loc - VARYING_SLOT_CLIP_DIST0) * 4 + nir_intrinsic_component(intr);
53
54 unsigned base = b->shader->info.clip_distance_array_size;
55 if (total_component < base)
56 return false;
57
58 unsigned component = total_component - base;
59 if (component >= b->shader->info.cull_distance_array_size)
60 return false;
61
62 assert(nir_src_num_components(intr->src[0]) == 1 && "must be scalarized");
63
64 b->cursor = nir_before_instr(&intr->instr);
65 nir_def *offs = nir_imm_int(b, component / 4);
66 nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0));
67
68 nir_store_output(b, v, offs, .component = component % 4,
69 .src_type = nir_type_float32,
70 .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE,
71 .io_semantics.num_slots = 2);
72 return true;
73 }
74
75 bool
agx_nir_lower_cull_distance_vs(nir_shader * s)76 agx_nir_lower_cull_distance_vs(nir_shader *s)
77 {
78 assert(s->info.stage == MESA_SHADER_VERTEX ||
79 s->info.stage == MESA_SHADER_TESS_EVAL);
80
81 nir_shader_intrinsics_pass(s, lower_write, nir_metadata_control_flow, NULL);
82
83 s->info.outputs_written |=
84 BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
85 DIV_ROUND_UP(s->info.cull_distance_array_size, 4));
86 return true;
87 }
88
89 bool
agx_nir_lower_cull_distance_fs(nir_shader * s,unsigned nr_distances)90 agx_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances)
91 {
92 assert(s->info.stage == MESA_SHADER_FRAGMENT);
93 assert(nr_distances > 0);
94
95 nir_builder b_ =
96 nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
97 nir_builder *b = &b_;
98
99 /* Test each half-space */
100 nir_def *culled = nir_imm_false(b);
101
102 for (unsigned i = 0; i < nr_distances; ++i) {
103 /* Load the coefficient vector for this half-space. Imaginapple
104 * partial derivatives and the value somewhere.
105 */
106 nir_def *cf = nir_load_coefficients_agx(
107 b, nir_imm_int(b, 0), .component = i & 3,
108 .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4),
109 .io_semantics.num_slots = nr_distances / 4,
110 .interp_mode = INTERP_MODE_NOPERSPECTIVE);
111
112 /* If the coefficients are identically zero, then the quantity is
113 * zero across the primtive <==> cull distance is negative across the
114 * primitive <==> the primitive is culled.
115 */
116 culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cf, 0)));
117 }
118
119 /* Emulate primitive culling by discarding fragments */
120 nir_demote_if(b, culled);
121
122 s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
123 DIV_ROUND_UP(nr_distances, 4));
124
125 s->info.fs.uses_discard = true;
126 nir_metadata_preserve(b->impl, nir_metadata_control_flow);
127 return true;
128 }
129