xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/shaders/geometry.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright 2023 Alyssa Rosenzweig
3*61046927SAndroid Build Coastguard Worker  * Copyright 2023 Valve Corporation
4*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker  */
6*61046927SAndroid Build Coastguard Worker 
7*61046927SAndroid Build Coastguard Worker #include "compiler/shader_enums.h"
8*61046927SAndroid Build Coastguard Worker #include "libagx.h"
9*61046927SAndroid Build Coastguard Worker 
10*61046927SAndroid Build Coastguard Worker #ifndef __OPENCL_VERSION__
11*61046927SAndroid Build Coastguard Worker #include "util/bitscan.h"
12*61046927SAndroid Build Coastguard Worker #define CONST(type_)         uint64_t
13*61046927SAndroid Build Coastguard Worker #define libagx_popcount(x)   util_bitcount64(x)
14*61046927SAndroid Build Coastguard Worker #define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
15*61046927SAndroid Build Coastguard Worker #else
16*61046927SAndroid Build Coastguard Worker #define CONST(type_)         constant type_ *
17*61046927SAndroid Build Coastguard Worker #define libagx_popcount(x)   popcount(x)
18*61046927SAndroid Build Coastguard Worker #define libagx_sub_sat(x, y) sub_sat(x, y)
19*61046927SAndroid Build Coastguard Worker #endif
20*61046927SAndroid Build Coastguard Worker 
21*61046927SAndroid Build Coastguard Worker #ifndef LIBAGX_GEOMETRY_H
22*61046927SAndroid Build Coastguard Worker #define LIBAGX_GEOMETRY_H
23*61046927SAndroid Build Coastguard Worker 
24*61046927SAndroid Build Coastguard Worker #define MAX_SO_BUFFERS     4
25*61046927SAndroid Build Coastguard Worker #define MAX_VERTEX_STREAMS 4
26*61046927SAndroid Build Coastguard Worker 
27*61046927SAndroid Build Coastguard Worker /* Packed geometry state buffer */
28*61046927SAndroid Build Coastguard Worker struct agx_geometry_state {
29*61046927SAndroid Build Coastguard Worker    /* Heap to allocate from. */
30*61046927SAndroid Build Coastguard Worker    GLOBAL(uchar) heap;
31*61046927SAndroid Build Coastguard Worker    uint32_t heap_bottom, heap_size;
32*61046927SAndroid Build Coastguard Worker } PACKED;
33*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_geometry_state) == 4 * 4);
34*61046927SAndroid Build Coastguard Worker 
35*61046927SAndroid Build Coastguard Worker struct agx_restart_unroll_params {
36*61046927SAndroid Build Coastguard Worker    /* Heap to allocate from across draws */
37*61046927SAndroid Build Coastguard Worker    GLOBAL(struct agx_geometry_state) heap;
38*61046927SAndroid Build Coastguard Worker 
39*61046927SAndroid Build Coastguard Worker    /* Input: index buffer if present. */
40*61046927SAndroid Build Coastguard Worker    uint64_t index_buffer;
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker    /* Input: draw count */
43*61046927SAndroid Build Coastguard Worker    CONST(uint) count;
44*61046927SAndroid Build Coastguard Worker 
45*61046927SAndroid Build Coastguard Worker    /* Input: indirect draw descriptor. Raw pointer since it's strided. */
46*61046927SAndroid Build Coastguard Worker    uint64_t draws;
47*61046927SAndroid Build Coastguard Worker 
48*61046927SAndroid Build Coastguard Worker    /* Output draw descriptors */
49*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) out_draws;
50*61046927SAndroid Build Coastguard Worker 
51*61046927SAndroid Build Coastguard Worker    /* Pointer to zero */
52*61046927SAndroid Build Coastguard Worker    uint64_t zero_sink;
53*61046927SAndroid Build Coastguard Worker 
54*61046927SAndroid Build Coastguard Worker    /* Input: maximum draw count, count is clamped to this */
55*61046927SAndroid Build Coastguard Worker    uint32_t max_draws;
56*61046927SAndroid Build Coastguard Worker 
57*61046927SAndroid Build Coastguard Worker    /* Primitive restart index */
58*61046927SAndroid Build Coastguard Worker    uint32_t restart_index;
59*61046927SAndroid Build Coastguard Worker 
60*61046927SAndroid Build Coastguard Worker    /* Input index buffer size in elements */
61*61046927SAndroid Build Coastguard Worker    uint32_t index_buffer_size_el;
62*61046927SAndroid Build Coastguard Worker 
63*61046927SAndroid Build Coastguard Worker    /* Stride for the draw descriptor array */
64*61046927SAndroid Build Coastguard Worker    uint32_t draw_stride;
65*61046927SAndroid Build Coastguard Worker 
66*61046927SAndroid Build Coastguard Worker    /* Use first vertex as the provoking vertex for flat shading. We could stick
67*61046927SAndroid Build Coastguard Worker     * this in the key, but meh, you're already hosed for perf on the unroll
68*61046927SAndroid Build Coastguard Worker     * path.
69*61046927SAndroid Build Coastguard Worker     */
70*61046927SAndroid Build Coastguard Worker    uint32_t flatshade_first;
71*61046927SAndroid Build Coastguard Worker } PACKED;
72*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 17 * 4);
73*61046927SAndroid Build Coastguard Worker 
74*61046927SAndroid Build Coastguard Worker struct agx_gs_setup_indirect_params {
75*61046927SAndroid Build Coastguard Worker    /* Index buffer if present. */
76*61046927SAndroid Build Coastguard Worker    uint64_t index_buffer;
77*61046927SAndroid Build Coastguard Worker 
78*61046927SAndroid Build Coastguard Worker    /* Indirect draw descriptor. */
79*61046927SAndroid Build Coastguard Worker    CONST(uint) draw;
80*61046927SAndroid Build Coastguard Worker 
81*61046927SAndroid Build Coastguard Worker    /* Pointer to be written with allocated vertex buffer */
82*61046927SAndroid Build Coastguard Worker    GLOBAL(uintptr_t) vertex_buffer;
83*61046927SAndroid Build Coastguard Worker 
84*61046927SAndroid Build Coastguard Worker    /* Output input assembly state */
85*61046927SAndroid Build Coastguard Worker    GLOBAL(struct agx_ia_state) ia;
86*61046927SAndroid Build Coastguard Worker 
87*61046927SAndroid Build Coastguard Worker    /* Output geometry parameters */
88*61046927SAndroid Build Coastguard Worker    GLOBAL(struct agx_geometry_params) geom;
89*61046927SAndroid Build Coastguard Worker 
90*61046927SAndroid Build Coastguard Worker    /* Pointer to zero */
91*61046927SAndroid Build Coastguard Worker    uint64_t zero_sink;
92*61046927SAndroid Build Coastguard Worker 
93*61046927SAndroid Build Coastguard Worker    /* Vertex (TES) output mask for sizing the allocated buffer */
94*61046927SAndroid Build Coastguard Worker    uint64_t vs_outputs;
95*61046927SAndroid Build Coastguard Worker 
96*61046927SAndroid Build Coastguard Worker    /* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
97*61046927SAndroid Build Coastguard Worker    uint32_t index_size_B;
98*61046927SAndroid Build Coastguard Worker 
99*61046927SAndroid Build Coastguard Worker    /* Size of the index buffer */
100*61046927SAndroid Build Coastguard Worker    uint32_t index_buffer_range_el;
101*61046927SAndroid Build Coastguard Worker } PACKED;
102*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 16 * 4);
103*61046927SAndroid Build Coastguard Worker 
104*61046927SAndroid Build Coastguard Worker struct agx_ia_state {
105*61046927SAndroid Build Coastguard Worker    /* Index buffer if present. */
106*61046927SAndroid Build Coastguard Worker    uint64_t index_buffer;
107*61046927SAndroid Build Coastguard Worker 
108*61046927SAndroid Build Coastguard Worker    /* Size of the bound index buffer for bounds checking */
109*61046927SAndroid Build Coastguard Worker    uint32_t index_buffer_range_el;
110*61046927SAndroid Build Coastguard Worker 
111*61046927SAndroid Build Coastguard Worker    /* Number of vertices per instance. Written by CPU for direct draw, indirect
112*61046927SAndroid Build Coastguard Worker     * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
113*61046927SAndroid Build Coastguard Worker     */
114*61046927SAndroid Build Coastguard Worker    uint32_t verts_per_instance;
115*61046927SAndroid Build Coastguard Worker } PACKED;
116*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 4 * 4);
117*61046927SAndroid Build Coastguard Worker 
118*61046927SAndroid Build Coastguard Worker static inline uint64_t
libagx_index_buffer(uint64_t index_buffer,uint size_el,uint offset_el,uint elsize_B,uint64_t zero_sink)119*61046927SAndroid Build Coastguard Worker libagx_index_buffer(uint64_t index_buffer, uint size_el, uint offset_el,
120*61046927SAndroid Build Coastguard Worker                     uint elsize_B, uint64_t zero_sink)
121*61046927SAndroid Build Coastguard Worker {
122*61046927SAndroid Build Coastguard Worker    if (offset_el < size_el)
123*61046927SAndroid Build Coastguard Worker       return index_buffer + (offset_el * elsize_B);
124*61046927SAndroid Build Coastguard Worker    else
125*61046927SAndroid Build Coastguard Worker       return zero_sink;
126*61046927SAndroid Build Coastguard Worker }
127*61046927SAndroid Build Coastguard Worker 
128*61046927SAndroid Build Coastguard Worker static inline uint
libagx_index_buffer_range_el(uint size_el,uint offset_el)129*61046927SAndroid Build Coastguard Worker libagx_index_buffer_range_el(uint size_el, uint offset_el)
130*61046927SAndroid Build Coastguard Worker {
131*61046927SAndroid Build Coastguard Worker    return libagx_sub_sat(size_el, offset_el);
132*61046927SAndroid Build Coastguard Worker }
133*61046927SAndroid Build Coastguard Worker 
134*61046927SAndroid Build Coastguard Worker struct agx_geometry_params {
135*61046927SAndroid Build Coastguard Worker    /* Persistent (cross-draw) geometry state */
136*61046927SAndroid Build Coastguard Worker    GLOBAL(struct agx_geometry_state) state;
137*61046927SAndroid Build Coastguard Worker 
138*61046927SAndroid Build Coastguard Worker    /* Address of associated indirect draw buffer */
139*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) indirect_desc;
140*61046927SAndroid Build Coastguard Worker 
141*61046927SAndroid Build Coastguard Worker    /* Address of count buffer. For an indirect draw, this will be written by the
142*61046927SAndroid Build Coastguard Worker     * indirect setup kernel.
143*61046927SAndroid Build Coastguard Worker     */
144*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) count_buffer;
145*61046927SAndroid Build Coastguard Worker 
146*61046927SAndroid Build Coastguard Worker    /* Address of the primitives generated counters */
147*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) prims_generated_counter[MAX_VERTEX_STREAMS];
148*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) xfb_prims_generated_counter[MAX_VERTEX_STREAMS];
149*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) xfb_overflow[MAX_VERTEX_STREAMS];
150*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) xfb_any_overflow;
151*61046927SAndroid Build Coastguard Worker 
152*61046927SAndroid Build Coastguard Worker    /* Pointers to transform feedback buffer offsets in bytes */
153*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) xfb_offs_ptrs[MAX_SO_BUFFERS];
154*61046927SAndroid Build Coastguard Worker 
155*61046927SAndroid Build Coastguard Worker    /* Output index buffer, allocated by pre-GS. */
156*61046927SAndroid Build Coastguard Worker    GLOBAL(uint) output_index_buffer;
157*61046927SAndroid Build Coastguard Worker 
158*61046927SAndroid Build Coastguard Worker    /* Address of transform feedback buffer in general, supplied by the CPU. */
159*61046927SAndroid Build Coastguard Worker    GLOBAL(uchar) xfb_base_original[MAX_SO_BUFFERS];
160*61046927SAndroid Build Coastguard Worker 
161*61046927SAndroid Build Coastguard Worker    /* Address of transform feedback for the current primitive. Written by pre-GS
162*61046927SAndroid Build Coastguard Worker     * program.
163*61046927SAndroid Build Coastguard Worker     */
164*61046927SAndroid Build Coastguard Worker    GLOBAL(uchar) xfb_base[MAX_SO_BUFFERS];
165*61046927SAndroid Build Coastguard Worker 
166*61046927SAndroid Build Coastguard Worker    /* Address and present mask for the input to the geometry shader. These will
167*61046927SAndroid Build Coastguard Worker     * reflect the vertex shader for VS->GS or instead the tessellation
168*61046927SAndroid Build Coastguard Worker     * evaluation shader for TES->GS.
169*61046927SAndroid Build Coastguard Worker     */
170*61046927SAndroid Build Coastguard Worker    uint64_t input_buffer;
171*61046927SAndroid Build Coastguard Worker    uint64_t input_mask;
172*61046927SAndroid Build Coastguard Worker 
173*61046927SAndroid Build Coastguard Worker    /* Location-indexed mask of flat outputs, used for lowering GL edge flags. */
174*61046927SAndroid Build Coastguard Worker    uint64_t flat_outputs;
175*61046927SAndroid Build Coastguard Worker 
176*61046927SAndroid Build Coastguard Worker    uint32_t xfb_size[MAX_SO_BUFFERS];
177*61046927SAndroid Build Coastguard Worker 
178*61046927SAndroid Build Coastguard Worker    /* Number of primitives emitted by transform feedback per stream. Written by
179*61046927SAndroid Build Coastguard Worker     * the pre-GS program.
180*61046927SAndroid Build Coastguard Worker     */
181*61046927SAndroid Build Coastguard Worker    uint32_t xfb_prims[MAX_VERTEX_STREAMS];
182*61046927SAndroid Build Coastguard Worker 
183*61046927SAndroid Build Coastguard Worker    /* Within an indirect GS draw, the grids used to dispatch the VS/GS written
184*61046927SAndroid Build Coastguard Worker     * out by the GS indirect setup kernel or the CPU for a direct draw.
185*61046927SAndroid Build Coastguard Worker     */
186*61046927SAndroid Build Coastguard Worker    uint32_t vs_grid[3];
187*61046927SAndroid Build Coastguard Worker    uint32_t gs_grid[3];
188*61046927SAndroid Build Coastguard Worker 
189*61046927SAndroid Build Coastguard Worker    /* Number of input primitives across all instances, calculated by the CPU for
190*61046927SAndroid Build Coastguard Worker     * a direct draw or the GS indirect setup kernel for an indirect draw.
191*61046927SAndroid Build Coastguard Worker     */
192*61046927SAndroid Build Coastguard Worker    uint32_t input_primitives;
193*61046927SAndroid Build Coastguard Worker 
194*61046927SAndroid Build Coastguard Worker    /* Number of input primitives per instance, rounded up to a power-of-two and
195*61046927SAndroid Build Coastguard Worker     * with the base-2 log taken. This is used to partition the output vertex IDs
196*61046927SAndroid Build Coastguard Worker     * efficiently.
197*61046927SAndroid Build Coastguard Worker     */
198*61046927SAndroid Build Coastguard Worker    uint32_t primitives_log2;
199*61046927SAndroid Build Coastguard Worker 
200*61046927SAndroid Build Coastguard Worker    /* Number of bytes output by the GS count shader per input primitive (may be
201*61046927SAndroid Build Coastguard Worker     * 0), written by CPU and consumed by indirect draw setup shader for
202*61046927SAndroid Build Coastguard Worker     * allocating counts.
203*61046927SAndroid Build Coastguard Worker     */
204*61046927SAndroid Build Coastguard Worker    uint32_t count_buffer_stride;
205*61046927SAndroid Build Coastguard Worker 
206*61046927SAndroid Build Coastguard Worker    /* Dynamic input topology. Must be compatible with the geometry shader's
207*61046927SAndroid Build Coastguard Worker     * layout() declared input class.
208*61046927SAndroid Build Coastguard Worker     */
209*61046927SAndroid Build Coastguard Worker    uint32_t input_topology;
210*61046927SAndroid Build Coastguard Worker } PACKED;
211*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_geometry_params) == 82 * 4);
212*61046927SAndroid Build Coastguard Worker 
213*61046927SAndroid Build Coastguard Worker /* TCS shared memory layout:
214*61046927SAndroid Build Coastguard Worker  *
215*61046927SAndroid Build Coastguard Worker  *    vec4 vs_outputs[VERTICES_IN_INPUT_PATCH][TOTAL_VERTEX_OUTPUTS];
216*61046927SAndroid Build Coastguard Worker  *
217*61046927SAndroid Build Coastguard Worker  * TODO: compact.
218*61046927SAndroid Build Coastguard Worker  */
219*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_in_offs(uint vtx,gl_varying_slot location,uint64_t crosslane_vs_out_mask)220*61046927SAndroid Build Coastguard Worker libagx_tcs_in_offs(uint vtx, gl_varying_slot location,
221*61046927SAndroid Build Coastguard Worker                    uint64_t crosslane_vs_out_mask)
222*61046927SAndroid Build Coastguard Worker {
223*61046927SAndroid Build Coastguard Worker    uint base = vtx * libagx_popcount(crosslane_vs_out_mask);
224*61046927SAndroid Build Coastguard Worker    uint offs = libagx_popcount(crosslane_vs_out_mask &
225*61046927SAndroid Build Coastguard Worker                                (((uint64_t)(1) << location) - 1));
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker    return (base + offs) * 16;
228*61046927SAndroid Build Coastguard Worker }
229*61046927SAndroid Build Coastguard Worker 
230*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_in_size(uint32_t vertices_in_patch,uint64_t crosslane_vs_out_mask)231*61046927SAndroid Build Coastguard Worker libagx_tcs_in_size(uint32_t vertices_in_patch, uint64_t crosslane_vs_out_mask)
232*61046927SAndroid Build Coastguard Worker {
233*61046927SAndroid Build Coastguard Worker    return vertices_in_patch * libagx_popcount(crosslane_vs_out_mask) * 16;
234*61046927SAndroid Build Coastguard Worker }
235*61046927SAndroid Build Coastguard Worker 
236*61046927SAndroid Build Coastguard Worker /*
237*61046927SAndroid Build Coastguard Worker  * TCS out buffer layout, per-patch:
238*61046927SAndroid Build Coastguard Worker  *
239*61046927SAndroid Build Coastguard Worker  *    float tess_level_outer[4];
240*61046927SAndroid Build Coastguard Worker  *    float tess_level_inner[2];
241*61046927SAndroid Build Coastguard Worker  *    vec4 patch_out[MAX_PATCH_OUTPUTS];
242*61046927SAndroid Build Coastguard Worker  *    vec4 vtx_out[OUT_PATCH_SIZE][TOTAL_VERTEX_OUTPUTS];
243*61046927SAndroid Build Coastguard Worker  *
244*61046927SAndroid Build Coastguard Worker  * Vertex out are compacted based on the mask of written out. Patch
245*61046927SAndroid Build Coastguard Worker  * out are used as-is.
246*61046927SAndroid Build Coastguard Worker  *
247*61046927SAndroid Build Coastguard Worker  * Bounding boxes are ignored.
248*61046927SAndroid Build Coastguard Worker  */
249*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_out_offs(uint vtx_id,gl_varying_slot location,uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)250*61046927SAndroid Build Coastguard Worker libagx_tcs_out_offs(uint vtx_id, gl_varying_slot location, uint nr_patch_out,
251*61046927SAndroid Build Coastguard Worker                     uint out_patch_size, uint64_t vtx_out_mask)
252*61046927SAndroid Build Coastguard Worker {
253*61046927SAndroid Build Coastguard Worker    uint off = 0;
254*61046927SAndroid Build Coastguard Worker    if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
255*61046927SAndroid Build Coastguard Worker       return off;
256*61046927SAndroid Build Coastguard Worker 
257*61046927SAndroid Build Coastguard Worker    off += 4 * sizeof(float);
258*61046927SAndroid Build Coastguard Worker    if (location == VARYING_SLOT_TESS_LEVEL_INNER)
259*61046927SAndroid Build Coastguard Worker       return off;
260*61046927SAndroid Build Coastguard Worker 
261*61046927SAndroid Build Coastguard Worker    off += 2 * sizeof(float);
262*61046927SAndroid Build Coastguard Worker    if (location >= VARYING_SLOT_PATCH0)
263*61046927SAndroid Build Coastguard Worker       return off + (16 * (location - VARYING_SLOT_PATCH0));
264*61046927SAndroid Build Coastguard Worker 
265*61046927SAndroid Build Coastguard Worker    /* Anything else is a per-vtx output */
266*61046927SAndroid Build Coastguard Worker    off += 16 * nr_patch_out;
267*61046927SAndroid Build Coastguard Worker    off += 16 * vtx_id * libagx_popcount(vtx_out_mask);
268*61046927SAndroid Build Coastguard Worker 
269*61046927SAndroid Build Coastguard Worker    uint idx = libagx_popcount(vtx_out_mask & (((uint64_t)(1) << location) - 1));
270*61046927SAndroid Build Coastguard Worker    return off + (16 * idx);
271*61046927SAndroid Build Coastguard Worker }
272*61046927SAndroid Build Coastguard Worker 
273*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_out_stride(uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)274*61046927SAndroid Build Coastguard Worker libagx_tcs_out_stride(uint nr_patch_out, uint out_patch_size,
275*61046927SAndroid Build Coastguard Worker                       uint64_t vtx_out_mask)
276*61046927SAndroid Build Coastguard Worker {
277*61046927SAndroid Build Coastguard Worker    return libagx_tcs_out_offs(out_patch_size, VARYING_SLOT_VAR0, nr_patch_out,
278*61046927SAndroid Build Coastguard Worker                               out_patch_size, vtx_out_mask);
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker 
281*61046927SAndroid Build Coastguard Worker /* In a tess eval shader, stride for hw vertex ID */
282*61046927SAndroid Build Coastguard Worker #define LIBAGX_TES_PATCH_ID_STRIDE 8192
283*61046927SAndroid Build Coastguard Worker 
284*61046927SAndroid Build Coastguard Worker #endif
285