xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/shaders/geometry.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * Copyright 2023 Valve Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "compiler/shader_enums.h"
8 #include "libagx.h"
9 
10 #ifndef __OPENCL_VERSION__
11 #include "util/bitscan.h"
12 #define CONST(type_)         uint64_t
13 #define libagx_popcount(x)   util_bitcount64(x)
14 #define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
15 #else
16 #define CONST(type_)         constant type_ *
17 #define libagx_popcount(x)   popcount(x)
18 #define libagx_sub_sat(x, y) sub_sat(x, y)
19 #endif
20 
21 #ifndef LIBAGX_GEOMETRY_H
22 #define LIBAGX_GEOMETRY_H
23 
24 #define MAX_SO_BUFFERS     4
25 #define MAX_VERTEX_STREAMS 4
26 
27 /* Packed geometry state buffer */
28 struct agx_geometry_state {
29    /* Heap to allocate from. */
30    GLOBAL(uchar) heap;
31    uint32_t heap_bottom, heap_size;
32 } PACKED;
33 AGX_STATIC_ASSERT(sizeof(struct agx_geometry_state) == 4 * 4);
34 
35 struct agx_restart_unroll_params {
36    /* Heap to allocate from across draws */
37    GLOBAL(struct agx_geometry_state) heap;
38 
39    /* Input: index buffer if present. */
40    uint64_t index_buffer;
41 
42    /* Input: draw count */
43    CONST(uint) count;
44 
45    /* Input: indirect draw descriptor. Raw pointer since it's strided. */
46    uint64_t draws;
47 
48    /* Output draw descriptors */
49    GLOBAL(uint) out_draws;
50 
51    /* Pointer to zero */
52    uint64_t zero_sink;
53 
54    /* Input: maximum draw count, count is clamped to this */
55    uint32_t max_draws;
56 
57    /* Primitive restart index */
58    uint32_t restart_index;
59 
60    /* Input index buffer size in elements */
61    uint32_t index_buffer_size_el;
62 
63    /* Stride for the draw descriptor array */
64    uint32_t draw_stride;
65 
66    /* Use first vertex as the provoking vertex for flat shading. We could stick
67     * this in the key, but meh, you're already hosed for perf on the unroll
68     * path.
69     */
70    uint32_t flatshade_first;
71 } PACKED;
72 AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 17 * 4);
73 
74 struct agx_gs_setup_indirect_params {
75    /* Index buffer if present. */
76    uint64_t index_buffer;
77 
78    /* Indirect draw descriptor. */
79    CONST(uint) draw;
80 
81    /* Pointer to be written with allocated vertex buffer */
82    GLOBAL(uintptr_t) vertex_buffer;
83 
84    /* Output input assembly state */
85    GLOBAL(struct agx_ia_state) ia;
86 
87    /* Output geometry parameters */
88    GLOBAL(struct agx_geometry_params) geom;
89 
90    /* Pointer to zero */
91    uint64_t zero_sink;
92 
93    /* Vertex (TES) output mask for sizing the allocated buffer */
94    uint64_t vs_outputs;
95 
96    /* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
97    uint32_t index_size_B;
98 
99    /* Size of the index buffer */
100    uint32_t index_buffer_range_el;
101 } PACKED;
102 AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 16 * 4);
103 
104 struct agx_ia_state {
105    /* Index buffer if present. */
106    uint64_t index_buffer;
107 
108    /* Size of the bound index buffer for bounds checking */
109    uint32_t index_buffer_range_el;
110 
111    /* Number of vertices per instance. Written by CPU for direct draw, indirect
112     * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
113     */
114    uint32_t verts_per_instance;
115 } PACKED;
116 AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 4 * 4);
117 
118 static inline uint64_t
libagx_index_buffer(uint64_t index_buffer,uint size_el,uint offset_el,uint elsize_B,uint64_t zero_sink)119 libagx_index_buffer(uint64_t index_buffer, uint size_el, uint offset_el,
120                     uint elsize_B, uint64_t zero_sink)
121 {
122    if (offset_el < size_el)
123       return index_buffer + (offset_el * elsize_B);
124    else
125       return zero_sink;
126 }
127 
128 static inline uint
libagx_index_buffer_range_el(uint size_el,uint offset_el)129 libagx_index_buffer_range_el(uint size_el, uint offset_el)
130 {
131    return libagx_sub_sat(size_el, offset_el);
132 }
133 
134 struct agx_geometry_params {
135    /* Persistent (cross-draw) geometry state */
136    GLOBAL(struct agx_geometry_state) state;
137 
138    /* Address of associated indirect draw buffer */
139    GLOBAL(uint) indirect_desc;
140 
141    /* Address of count buffer. For an indirect draw, this will be written by the
142     * indirect setup kernel.
143     */
144    GLOBAL(uint) count_buffer;
145 
146    /* Address of the primitives generated counters */
147    GLOBAL(uint) prims_generated_counter[MAX_VERTEX_STREAMS];
148    GLOBAL(uint) xfb_prims_generated_counter[MAX_VERTEX_STREAMS];
149    GLOBAL(uint) xfb_overflow[MAX_VERTEX_STREAMS];
150    GLOBAL(uint) xfb_any_overflow;
151 
152    /* Pointers to transform feedback buffer offsets in bytes */
153    GLOBAL(uint) xfb_offs_ptrs[MAX_SO_BUFFERS];
154 
155    /* Output index buffer, allocated by pre-GS. */
156    GLOBAL(uint) output_index_buffer;
157 
158    /* Address of transform feedback buffer in general, supplied by the CPU. */
159    GLOBAL(uchar) xfb_base_original[MAX_SO_BUFFERS];
160 
161    /* Address of transform feedback for the current primitive. Written by pre-GS
162     * program.
163     */
164    GLOBAL(uchar) xfb_base[MAX_SO_BUFFERS];
165 
166    /* Address and present mask for the input to the geometry shader. These will
167     * reflect the vertex shader for VS->GS or instead the tessellation
168     * evaluation shader for TES->GS.
169     */
170    uint64_t input_buffer;
171    uint64_t input_mask;
172 
173    /* Location-indexed mask of flat outputs, used for lowering GL edge flags. */
174    uint64_t flat_outputs;
175 
176    uint32_t xfb_size[MAX_SO_BUFFERS];
177 
178    /* Number of primitives emitted by transform feedback per stream. Written by
179     * the pre-GS program.
180     */
181    uint32_t xfb_prims[MAX_VERTEX_STREAMS];
182 
183    /* Within an indirect GS draw, the grids used to dispatch the VS/GS written
184     * out by the GS indirect setup kernel or the CPU for a direct draw.
185     */
186    uint32_t vs_grid[3];
187    uint32_t gs_grid[3];
188 
189    /* Number of input primitives across all instances, calculated by the CPU for
190     * a direct draw or the GS indirect setup kernel for an indirect draw.
191     */
192    uint32_t input_primitives;
193 
194    /* Number of input primitives per instance, rounded up to a power-of-two and
195     * with the base-2 log taken. This is used to partition the output vertex IDs
196     * efficiently.
197     */
198    uint32_t primitives_log2;
199 
200    /* Number of bytes output by the GS count shader per input primitive (may be
201     * 0), written by CPU and consumed by indirect draw setup shader for
202     * allocating counts.
203     */
204    uint32_t count_buffer_stride;
205 
206    /* Dynamic input topology. Must be compatible with the geometry shader's
207     * layout() declared input class.
208     */
209    uint32_t input_topology;
210 } PACKED;
211 AGX_STATIC_ASSERT(sizeof(struct agx_geometry_params) == 82 * 4);
212 
213 /* TCS shared memory layout:
214  *
215  *    vec4 vs_outputs[VERTICES_IN_INPUT_PATCH][TOTAL_VERTEX_OUTPUTS];
216  *
217  * TODO: compact.
218  */
219 static inline uint
libagx_tcs_in_offs(uint vtx,gl_varying_slot location,uint64_t crosslane_vs_out_mask)220 libagx_tcs_in_offs(uint vtx, gl_varying_slot location,
221                    uint64_t crosslane_vs_out_mask)
222 {
223    uint base = vtx * libagx_popcount(crosslane_vs_out_mask);
224    uint offs = libagx_popcount(crosslane_vs_out_mask &
225                                (((uint64_t)(1) << location) - 1));
226 
227    return (base + offs) * 16;
228 }
229 
230 static inline uint
libagx_tcs_in_size(uint32_t vertices_in_patch,uint64_t crosslane_vs_out_mask)231 libagx_tcs_in_size(uint32_t vertices_in_patch, uint64_t crosslane_vs_out_mask)
232 {
233    return vertices_in_patch * libagx_popcount(crosslane_vs_out_mask) * 16;
234 }
235 
236 /*
237  * TCS out buffer layout, per-patch:
238  *
239  *    float tess_level_outer[4];
240  *    float tess_level_inner[2];
241  *    vec4 patch_out[MAX_PATCH_OUTPUTS];
242  *    vec4 vtx_out[OUT_PATCH_SIZE][TOTAL_VERTEX_OUTPUTS];
243  *
244  * Vertex out are compacted based on the mask of written out. Patch
245  * out are used as-is.
246  *
247  * Bounding boxes are ignored.
248  */
249 static inline uint
libagx_tcs_out_offs(uint vtx_id,gl_varying_slot location,uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)250 libagx_tcs_out_offs(uint vtx_id, gl_varying_slot location, uint nr_patch_out,
251                     uint out_patch_size, uint64_t vtx_out_mask)
252 {
253    uint off = 0;
254    if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
255       return off;
256 
257    off += 4 * sizeof(float);
258    if (location == VARYING_SLOT_TESS_LEVEL_INNER)
259       return off;
260 
261    off += 2 * sizeof(float);
262    if (location >= VARYING_SLOT_PATCH0)
263       return off + (16 * (location - VARYING_SLOT_PATCH0));
264 
265    /* Anything else is a per-vtx output */
266    off += 16 * nr_patch_out;
267    off += 16 * vtx_id * libagx_popcount(vtx_out_mask);
268 
269    uint idx = libagx_popcount(vtx_out_mask & (((uint64_t)(1) << location) - 1));
270    return off + (16 * idx);
271 }
272 
273 static inline uint
libagx_tcs_out_stride(uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)274 libagx_tcs_out_stride(uint nr_patch_out, uint out_patch_size,
275                       uint64_t vtx_out_mask)
276 {
277    return libagx_tcs_out_offs(out_patch_size, VARYING_SLOT_VAR0, nr_patch_out,
278                               out_patch_size, vtx_out_mask);
279 }
280 
281 /* In a tess eval shader, stride for hw vertex ID */
282 #define LIBAGX_TES_PATCH_ID_STRIDE 8192
283 
284 #endif
285