xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_nir_lower_cull_distance.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "compiler/nir/nir.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "util/macros.h"
9 #include "agx_compile.h"
10 #include "agx_nir.h"
11 #include "glsl_types.h"
12 #include "nir_builder_opcodes.h"
13 #include "shader_enums.h"
14 
15 /*
16  * Lower cull distance to discard. From the spec:
17  *
18  *    If the cull distance for any enabled cull half-space is negative for all
19  *    of the vertices of the primitive under consideration, the primitive is
20  *    discarded.
21  *
22  * We don't have a direct way to read the cull distance at non-provoking
23  * vertices in the fragment shader. Instead, we interpolate the quantity:
24  *
25  *    cull distance >= 0.0 ? 1.0 : 0.0
26  *
27  * Then, the discard condition is equivalent to:
28  *
29  *    "quantity is zero for all vertices of the primitive"
30  *
31  * which by linearity is equivalent to:
32  *
33  *    quantity is zero somewhere in the primitive and quantity has zero
34  *    first-order screen space derivatives.
35  *
36  * which we can determine with ease in the fragment shader.
37  */
38 
39 static bool
lower_write(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)40 lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
41 {
42    if (intr->intrinsic != nir_intrinsic_store_output)
43       return false;
44 
45    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
46    if (sem.location != VARYING_SLOT_CLIP_DIST0 &&
47        sem.location != VARYING_SLOT_CLIP_DIST1)
48       return false;
49 
50    signed loc = sem.location + nir_src_as_uint(intr->src[1]);
51    unsigned total_component =
52       (loc - VARYING_SLOT_CLIP_DIST0) * 4 + nir_intrinsic_component(intr);
53 
54    unsigned base = b->shader->info.clip_distance_array_size;
55    if (total_component < base)
56       return false;
57 
58    unsigned component = total_component - base;
59    if (component >= b->shader->info.cull_distance_array_size)
60       return false;
61 
62    assert(nir_src_num_components(intr->src[0]) == 1 && "must be scalarized");
63 
64    b->cursor = nir_before_instr(&intr->instr);
65    nir_def *offs = nir_imm_int(b, component / 4);
66    nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0));
67 
68    nir_store_output(b, v, offs, .component = component % 4,
69                     .src_type = nir_type_float32,
70                     .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE,
71                     .io_semantics.num_slots = 2);
72    return true;
73 }
74 
75 bool
agx_nir_lower_cull_distance_vs(nir_shader * s)76 agx_nir_lower_cull_distance_vs(nir_shader *s)
77 {
78    assert(s->info.stage == MESA_SHADER_VERTEX ||
79           s->info.stage == MESA_SHADER_TESS_EVAL);
80 
81    nir_shader_intrinsics_pass(s, lower_write, nir_metadata_control_flow, NULL);
82 
83    s->info.outputs_written |=
84       BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
85                        DIV_ROUND_UP(s->info.cull_distance_array_size, 4));
86    return true;
87 }
88 
89 bool
agx_nir_lower_cull_distance_fs(nir_shader * s,unsigned nr_distances)90 agx_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances)
91 {
92    assert(s->info.stage == MESA_SHADER_FRAGMENT);
93    assert(nr_distances > 0);
94 
95    nir_builder b_ =
96       nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
97    nir_builder *b = &b_;
98 
99    /* Test each half-space */
100    nir_def *culled = nir_imm_false(b);
101 
102    for (unsigned i = 0; i < nr_distances; ++i) {
103       /* Load the coefficient vector for this half-space. Imaginapple
104        * partial derivatives and the value somewhere.
105        */
106       nir_def *cf = nir_load_coefficients_agx(
107          b, nir_imm_int(b, 0), .component = i & 3,
108          .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4),
109          .io_semantics.num_slots = nr_distances / 4,
110          .interp_mode = INTERP_MODE_NOPERSPECTIVE);
111 
112       /* If the coefficients are identically zero, then the quantity is
113        * zero across the primtive <==> cull distance is negative across the
114        * primitive <==> the primitive is culled.
115        */
116       culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cf, 0)));
117    }
118 
119    /* Emulate primitive culling by discarding fragments */
120    nir_demote_if(b, culled);
121 
122    s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
123                                            DIV_ROUND_UP(nr_distances, 4));
124 
125    s->info.fs.uses_discard = true;
126    nir_metadata_preserve(b->impl, nir_metadata_control_flow);
127    return true;
128 }
129