1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright 2023 Alyssa Rosenzweig
3*61046927SAndroid Build Coastguard Worker * Copyright 2023 Valve Corporation
4*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker */
6*61046927SAndroid Build Coastguard Worker
7*61046927SAndroid Build Coastguard Worker #include "compiler/shader_enums.h"
8*61046927SAndroid Build Coastguard Worker #include "libagx.h"
9*61046927SAndroid Build Coastguard Worker
10*61046927SAndroid Build Coastguard Worker #ifndef __OPENCL_VERSION__
11*61046927SAndroid Build Coastguard Worker #include "util/bitscan.h"
12*61046927SAndroid Build Coastguard Worker #define CONST(type_) uint64_t
13*61046927SAndroid Build Coastguard Worker #define libagx_popcount(x) util_bitcount64(x)
14*61046927SAndroid Build Coastguard Worker #define libagx_sub_sat(x, y) ((x >= y) ? (x - y) : 0)
15*61046927SAndroid Build Coastguard Worker #else
16*61046927SAndroid Build Coastguard Worker #define CONST(type_) constant type_ *
17*61046927SAndroid Build Coastguard Worker #define libagx_popcount(x) popcount(x)
18*61046927SAndroid Build Coastguard Worker #define libagx_sub_sat(x, y) sub_sat(x, y)
19*61046927SAndroid Build Coastguard Worker #endif
20*61046927SAndroid Build Coastguard Worker
21*61046927SAndroid Build Coastguard Worker #ifndef LIBAGX_GEOMETRY_H
22*61046927SAndroid Build Coastguard Worker #define LIBAGX_GEOMETRY_H
23*61046927SAndroid Build Coastguard Worker
24*61046927SAndroid Build Coastguard Worker #define MAX_SO_BUFFERS 4
25*61046927SAndroid Build Coastguard Worker #define MAX_VERTEX_STREAMS 4
26*61046927SAndroid Build Coastguard Worker
27*61046927SAndroid Build Coastguard Worker /* Packed geometry state buffer */
28*61046927SAndroid Build Coastguard Worker struct agx_geometry_state {
29*61046927SAndroid Build Coastguard Worker /* Heap to allocate from. */
30*61046927SAndroid Build Coastguard Worker GLOBAL(uchar) heap;
31*61046927SAndroid Build Coastguard Worker uint32_t heap_bottom, heap_size;
32*61046927SAndroid Build Coastguard Worker } PACKED;
33*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_geometry_state) == 4 * 4);
34*61046927SAndroid Build Coastguard Worker
35*61046927SAndroid Build Coastguard Worker struct agx_restart_unroll_params {
36*61046927SAndroid Build Coastguard Worker /* Heap to allocate from across draws */
37*61046927SAndroid Build Coastguard Worker GLOBAL(struct agx_geometry_state) heap;
38*61046927SAndroid Build Coastguard Worker
39*61046927SAndroid Build Coastguard Worker /* Input: index buffer if present. */
40*61046927SAndroid Build Coastguard Worker uint64_t index_buffer;
41*61046927SAndroid Build Coastguard Worker
42*61046927SAndroid Build Coastguard Worker /* Input: draw count */
43*61046927SAndroid Build Coastguard Worker CONST(uint) count;
44*61046927SAndroid Build Coastguard Worker
45*61046927SAndroid Build Coastguard Worker /* Input: indirect draw descriptor. Raw pointer since it's strided. */
46*61046927SAndroid Build Coastguard Worker uint64_t draws;
47*61046927SAndroid Build Coastguard Worker
48*61046927SAndroid Build Coastguard Worker /* Output draw descriptors */
49*61046927SAndroid Build Coastguard Worker GLOBAL(uint) out_draws;
50*61046927SAndroid Build Coastguard Worker
51*61046927SAndroid Build Coastguard Worker /* Pointer to zero */
52*61046927SAndroid Build Coastguard Worker uint64_t zero_sink;
53*61046927SAndroid Build Coastguard Worker
54*61046927SAndroid Build Coastguard Worker /* Input: maximum draw count, count is clamped to this */
55*61046927SAndroid Build Coastguard Worker uint32_t max_draws;
56*61046927SAndroid Build Coastguard Worker
57*61046927SAndroid Build Coastguard Worker /* Primitive restart index */
58*61046927SAndroid Build Coastguard Worker uint32_t restart_index;
59*61046927SAndroid Build Coastguard Worker
60*61046927SAndroid Build Coastguard Worker /* Input index buffer size in elements */
61*61046927SAndroid Build Coastguard Worker uint32_t index_buffer_size_el;
62*61046927SAndroid Build Coastguard Worker
63*61046927SAndroid Build Coastguard Worker /* Stride for the draw descriptor array */
64*61046927SAndroid Build Coastguard Worker uint32_t draw_stride;
65*61046927SAndroid Build Coastguard Worker
66*61046927SAndroid Build Coastguard Worker /* Use first vertex as the provoking vertex for flat shading. We could stick
67*61046927SAndroid Build Coastguard Worker * this in the key, but meh, you're already hosed for perf on the unroll
68*61046927SAndroid Build Coastguard Worker * path.
69*61046927SAndroid Build Coastguard Worker */
70*61046927SAndroid Build Coastguard Worker uint32_t flatshade_first;
71*61046927SAndroid Build Coastguard Worker } PACKED;
72*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_restart_unroll_params) == 17 * 4);
73*61046927SAndroid Build Coastguard Worker
74*61046927SAndroid Build Coastguard Worker struct agx_gs_setup_indirect_params {
75*61046927SAndroid Build Coastguard Worker /* Index buffer if present. */
76*61046927SAndroid Build Coastguard Worker uint64_t index_buffer;
77*61046927SAndroid Build Coastguard Worker
78*61046927SAndroid Build Coastguard Worker /* Indirect draw descriptor. */
79*61046927SAndroid Build Coastguard Worker CONST(uint) draw;
80*61046927SAndroid Build Coastguard Worker
81*61046927SAndroid Build Coastguard Worker /* Pointer to be written with allocated vertex buffer */
82*61046927SAndroid Build Coastguard Worker GLOBAL(uintptr_t) vertex_buffer;
83*61046927SAndroid Build Coastguard Worker
84*61046927SAndroid Build Coastguard Worker /* Output input assembly state */
85*61046927SAndroid Build Coastguard Worker GLOBAL(struct agx_ia_state) ia;
86*61046927SAndroid Build Coastguard Worker
87*61046927SAndroid Build Coastguard Worker /* Output geometry parameters */
88*61046927SAndroid Build Coastguard Worker GLOBAL(struct agx_geometry_params) geom;
89*61046927SAndroid Build Coastguard Worker
90*61046927SAndroid Build Coastguard Worker /* Pointer to zero */
91*61046927SAndroid Build Coastguard Worker uint64_t zero_sink;
92*61046927SAndroid Build Coastguard Worker
93*61046927SAndroid Build Coastguard Worker /* Vertex (TES) output mask for sizing the allocated buffer */
94*61046927SAndroid Build Coastguard Worker uint64_t vs_outputs;
95*61046927SAndroid Build Coastguard Worker
96*61046927SAndroid Build Coastguard Worker /* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
97*61046927SAndroid Build Coastguard Worker uint32_t index_size_B;
98*61046927SAndroid Build Coastguard Worker
99*61046927SAndroid Build Coastguard Worker /* Size of the index buffer */
100*61046927SAndroid Build Coastguard Worker uint32_t index_buffer_range_el;
101*61046927SAndroid Build Coastguard Worker } PACKED;
102*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_gs_setup_indirect_params) == 16 * 4);
103*61046927SAndroid Build Coastguard Worker
104*61046927SAndroid Build Coastguard Worker struct agx_ia_state {
105*61046927SAndroid Build Coastguard Worker /* Index buffer if present. */
106*61046927SAndroid Build Coastguard Worker uint64_t index_buffer;
107*61046927SAndroid Build Coastguard Worker
108*61046927SAndroid Build Coastguard Worker /* Size of the bound index buffer for bounds checking */
109*61046927SAndroid Build Coastguard Worker uint32_t index_buffer_range_el;
110*61046927SAndroid Build Coastguard Worker
111*61046927SAndroid Build Coastguard Worker /* Number of vertices per instance. Written by CPU for direct draw, indirect
112*61046927SAndroid Build Coastguard Worker * setup kernel for indirect. This is used for VS->GS and VS->TCS indexing.
113*61046927SAndroid Build Coastguard Worker */
114*61046927SAndroid Build Coastguard Worker uint32_t verts_per_instance;
115*61046927SAndroid Build Coastguard Worker } PACKED;
116*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_ia_state) == 4 * 4);
117*61046927SAndroid Build Coastguard Worker
118*61046927SAndroid Build Coastguard Worker static inline uint64_t
libagx_index_buffer(uint64_t index_buffer,uint size_el,uint offset_el,uint elsize_B,uint64_t zero_sink)119*61046927SAndroid Build Coastguard Worker libagx_index_buffer(uint64_t index_buffer, uint size_el, uint offset_el,
120*61046927SAndroid Build Coastguard Worker uint elsize_B, uint64_t zero_sink)
121*61046927SAndroid Build Coastguard Worker {
122*61046927SAndroid Build Coastguard Worker if (offset_el < size_el)
123*61046927SAndroid Build Coastguard Worker return index_buffer + (offset_el * elsize_B);
124*61046927SAndroid Build Coastguard Worker else
125*61046927SAndroid Build Coastguard Worker return zero_sink;
126*61046927SAndroid Build Coastguard Worker }
127*61046927SAndroid Build Coastguard Worker
128*61046927SAndroid Build Coastguard Worker static inline uint
libagx_index_buffer_range_el(uint size_el,uint offset_el)129*61046927SAndroid Build Coastguard Worker libagx_index_buffer_range_el(uint size_el, uint offset_el)
130*61046927SAndroid Build Coastguard Worker {
131*61046927SAndroid Build Coastguard Worker return libagx_sub_sat(size_el, offset_el);
132*61046927SAndroid Build Coastguard Worker }
133*61046927SAndroid Build Coastguard Worker
134*61046927SAndroid Build Coastguard Worker struct agx_geometry_params {
135*61046927SAndroid Build Coastguard Worker /* Persistent (cross-draw) geometry state */
136*61046927SAndroid Build Coastguard Worker GLOBAL(struct agx_geometry_state) state;
137*61046927SAndroid Build Coastguard Worker
138*61046927SAndroid Build Coastguard Worker /* Address of associated indirect draw buffer */
139*61046927SAndroid Build Coastguard Worker GLOBAL(uint) indirect_desc;
140*61046927SAndroid Build Coastguard Worker
141*61046927SAndroid Build Coastguard Worker /* Address of count buffer. For an indirect draw, this will be written by the
142*61046927SAndroid Build Coastguard Worker * indirect setup kernel.
143*61046927SAndroid Build Coastguard Worker */
144*61046927SAndroid Build Coastguard Worker GLOBAL(uint) count_buffer;
145*61046927SAndroid Build Coastguard Worker
146*61046927SAndroid Build Coastguard Worker /* Address of the primitives generated counters */
147*61046927SAndroid Build Coastguard Worker GLOBAL(uint) prims_generated_counter[MAX_VERTEX_STREAMS];
148*61046927SAndroid Build Coastguard Worker GLOBAL(uint) xfb_prims_generated_counter[MAX_VERTEX_STREAMS];
149*61046927SAndroid Build Coastguard Worker GLOBAL(uint) xfb_overflow[MAX_VERTEX_STREAMS];
150*61046927SAndroid Build Coastguard Worker GLOBAL(uint) xfb_any_overflow;
151*61046927SAndroid Build Coastguard Worker
152*61046927SAndroid Build Coastguard Worker /* Pointers to transform feedback buffer offsets in bytes */
153*61046927SAndroid Build Coastguard Worker GLOBAL(uint) xfb_offs_ptrs[MAX_SO_BUFFERS];
154*61046927SAndroid Build Coastguard Worker
155*61046927SAndroid Build Coastguard Worker /* Output index buffer, allocated by pre-GS. */
156*61046927SAndroid Build Coastguard Worker GLOBAL(uint) output_index_buffer;
157*61046927SAndroid Build Coastguard Worker
158*61046927SAndroid Build Coastguard Worker /* Address of transform feedback buffer in general, supplied by the CPU. */
159*61046927SAndroid Build Coastguard Worker GLOBAL(uchar) xfb_base_original[MAX_SO_BUFFERS];
160*61046927SAndroid Build Coastguard Worker
161*61046927SAndroid Build Coastguard Worker /* Address of transform feedback for the current primitive. Written by pre-GS
162*61046927SAndroid Build Coastguard Worker * program.
163*61046927SAndroid Build Coastguard Worker */
164*61046927SAndroid Build Coastguard Worker GLOBAL(uchar) xfb_base[MAX_SO_BUFFERS];
165*61046927SAndroid Build Coastguard Worker
166*61046927SAndroid Build Coastguard Worker /* Address and present mask for the input to the geometry shader. These will
167*61046927SAndroid Build Coastguard Worker * reflect the vertex shader for VS->GS or instead the tessellation
168*61046927SAndroid Build Coastguard Worker * evaluation shader for TES->GS.
169*61046927SAndroid Build Coastguard Worker */
170*61046927SAndroid Build Coastguard Worker uint64_t input_buffer;
171*61046927SAndroid Build Coastguard Worker uint64_t input_mask;
172*61046927SAndroid Build Coastguard Worker
173*61046927SAndroid Build Coastguard Worker /* Location-indexed mask of flat outputs, used for lowering GL edge flags. */
174*61046927SAndroid Build Coastguard Worker uint64_t flat_outputs;
175*61046927SAndroid Build Coastguard Worker
176*61046927SAndroid Build Coastguard Worker uint32_t xfb_size[MAX_SO_BUFFERS];
177*61046927SAndroid Build Coastguard Worker
178*61046927SAndroid Build Coastguard Worker /* Number of primitives emitted by transform feedback per stream. Written by
179*61046927SAndroid Build Coastguard Worker * the pre-GS program.
180*61046927SAndroid Build Coastguard Worker */
181*61046927SAndroid Build Coastguard Worker uint32_t xfb_prims[MAX_VERTEX_STREAMS];
182*61046927SAndroid Build Coastguard Worker
183*61046927SAndroid Build Coastguard Worker /* Within an indirect GS draw, the grids used to dispatch the VS/GS written
184*61046927SAndroid Build Coastguard Worker * out by the GS indirect setup kernel or the CPU for a direct draw.
185*61046927SAndroid Build Coastguard Worker */
186*61046927SAndroid Build Coastguard Worker uint32_t vs_grid[3];
187*61046927SAndroid Build Coastguard Worker uint32_t gs_grid[3];
188*61046927SAndroid Build Coastguard Worker
189*61046927SAndroid Build Coastguard Worker /* Number of input primitives across all instances, calculated by the CPU for
190*61046927SAndroid Build Coastguard Worker * a direct draw or the GS indirect setup kernel for an indirect draw.
191*61046927SAndroid Build Coastguard Worker */
192*61046927SAndroid Build Coastguard Worker uint32_t input_primitives;
193*61046927SAndroid Build Coastguard Worker
194*61046927SAndroid Build Coastguard Worker /* Number of input primitives per instance, rounded up to a power-of-two and
195*61046927SAndroid Build Coastguard Worker * with the base-2 log taken. This is used to partition the output vertex IDs
196*61046927SAndroid Build Coastguard Worker * efficiently.
197*61046927SAndroid Build Coastguard Worker */
198*61046927SAndroid Build Coastguard Worker uint32_t primitives_log2;
199*61046927SAndroid Build Coastguard Worker
200*61046927SAndroid Build Coastguard Worker /* Number of bytes output by the GS count shader per input primitive (may be
201*61046927SAndroid Build Coastguard Worker * 0), written by CPU and consumed by indirect draw setup shader for
202*61046927SAndroid Build Coastguard Worker * allocating counts.
203*61046927SAndroid Build Coastguard Worker */
204*61046927SAndroid Build Coastguard Worker uint32_t count_buffer_stride;
205*61046927SAndroid Build Coastguard Worker
206*61046927SAndroid Build Coastguard Worker /* Dynamic input topology. Must be compatible with the geometry shader's
207*61046927SAndroid Build Coastguard Worker * layout() declared input class.
208*61046927SAndroid Build Coastguard Worker */
209*61046927SAndroid Build Coastguard Worker uint32_t input_topology;
210*61046927SAndroid Build Coastguard Worker } PACKED;
211*61046927SAndroid Build Coastguard Worker AGX_STATIC_ASSERT(sizeof(struct agx_geometry_params) == 82 * 4);
212*61046927SAndroid Build Coastguard Worker
213*61046927SAndroid Build Coastguard Worker /* TCS shared memory layout:
214*61046927SAndroid Build Coastguard Worker *
215*61046927SAndroid Build Coastguard Worker * vec4 vs_outputs[VERTICES_IN_INPUT_PATCH][TOTAL_VERTEX_OUTPUTS];
216*61046927SAndroid Build Coastguard Worker *
217*61046927SAndroid Build Coastguard Worker * TODO: compact.
218*61046927SAndroid Build Coastguard Worker */
219*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_in_offs(uint vtx,gl_varying_slot location,uint64_t crosslane_vs_out_mask)220*61046927SAndroid Build Coastguard Worker libagx_tcs_in_offs(uint vtx, gl_varying_slot location,
221*61046927SAndroid Build Coastguard Worker uint64_t crosslane_vs_out_mask)
222*61046927SAndroid Build Coastguard Worker {
223*61046927SAndroid Build Coastguard Worker uint base = vtx * libagx_popcount(crosslane_vs_out_mask);
224*61046927SAndroid Build Coastguard Worker uint offs = libagx_popcount(crosslane_vs_out_mask &
225*61046927SAndroid Build Coastguard Worker (((uint64_t)(1) << location) - 1));
226*61046927SAndroid Build Coastguard Worker
227*61046927SAndroid Build Coastguard Worker return (base + offs) * 16;
228*61046927SAndroid Build Coastguard Worker }
229*61046927SAndroid Build Coastguard Worker
230*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_in_size(uint32_t vertices_in_patch,uint64_t crosslane_vs_out_mask)231*61046927SAndroid Build Coastguard Worker libagx_tcs_in_size(uint32_t vertices_in_patch, uint64_t crosslane_vs_out_mask)
232*61046927SAndroid Build Coastguard Worker {
233*61046927SAndroid Build Coastguard Worker return vertices_in_patch * libagx_popcount(crosslane_vs_out_mask) * 16;
234*61046927SAndroid Build Coastguard Worker }
235*61046927SAndroid Build Coastguard Worker
236*61046927SAndroid Build Coastguard Worker /*
237*61046927SAndroid Build Coastguard Worker * TCS out buffer layout, per-patch:
238*61046927SAndroid Build Coastguard Worker *
239*61046927SAndroid Build Coastguard Worker * float tess_level_outer[4];
240*61046927SAndroid Build Coastguard Worker * float tess_level_inner[2];
241*61046927SAndroid Build Coastguard Worker * vec4 patch_out[MAX_PATCH_OUTPUTS];
242*61046927SAndroid Build Coastguard Worker * vec4 vtx_out[OUT_PATCH_SIZE][TOTAL_VERTEX_OUTPUTS];
243*61046927SAndroid Build Coastguard Worker *
244*61046927SAndroid Build Coastguard Worker * Vertex out are compacted based on the mask of written out. Patch
245*61046927SAndroid Build Coastguard Worker * out are used as-is.
246*61046927SAndroid Build Coastguard Worker *
247*61046927SAndroid Build Coastguard Worker * Bounding boxes are ignored.
248*61046927SAndroid Build Coastguard Worker */
249*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_out_offs(uint vtx_id,gl_varying_slot location,uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)250*61046927SAndroid Build Coastguard Worker libagx_tcs_out_offs(uint vtx_id, gl_varying_slot location, uint nr_patch_out,
251*61046927SAndroid Build Coastguard Worker uint out_patch_size, uint64_t vtx_out_mask)
252*61046927SAndroid Build Coastguard Worker {
253*61046927SAndroid Build Coastguard Worker uint off = 0;
254*61046927SAndroid Build Coastguard Worker if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
255*61046927SAndroid Build Coastguard Worker return off;
256*61046927SAndroid Build Coastguard Worker
257*61046927SAndroid Build Coastguard Worker off += 4 * sizeof(float);
258*61046927SAndroid Build Coastguard Worker if (location == VARYING_SLOT_TESS_LEVEL_INNER)
259*61046927SAndroid Build Coastguard Worker return off;
260*61046927SAndroid Build Coastguard Worker
261*61046927SAndroid Build Coastguard Worker off += 2 * sizeof(float);
262*61046927SAndroid Build Coastguard Worker if (location >= VARYING_SLOT_PATCH0)
263*61046927SAndroid Build Coastguard Worker return off + (16 * (location - VARYING_SLOT_PATCH0));
264*61046927SAndroid Build Coastguard Worker
265*61046927SAndroid Build Coastguard Worker /* Anything else is a per-vtx output */
266*61046927SAndroid Build Coastguard Worker off += 16 * nr_patch_out;
267*61046927SAndroid Build Coastguard Worker off += 16 * vtx_id * libagx_popcount(vtx_out_mask);
268*61046927SAndroid Build Coastguard Worker
269*61046927SAndroid Build Coastguard Worker uint idx = libagx_popcount(vtx_out_mask & (((uint64_t)(1) << location) - 1));
270*61046927SAndroid Build Coastguard Worker return off + (16 * idx);
271*61046927SAndroid Build Coastguard Worker }
272*61046927SAndroid Build Coastguard Worker
273*61046927SAndroid Build Coastguard Worker static inline uint
libagx_tcs_out_stride(uint nr_patch_out,uint out_patch_size,uint64_t vtx_out_mask)274*61046927SAndroid Build Coastguard Worker libagx_tcs_out_stride(uint nr_patch_out, uint out_patch_size,
275*61046927SAndroid Build Coastguard Worker uint64_t vtx_out_mask)
276*61046927SAndroid Build Coastguard Worker {
277*61046927SAndroid Build Coastguard Worker return libagx_tcs_out_offs(out_patch_size, VARYING_SLOT_VAR0, nr_patch_out,
278*61046927SAndroid Build Coastguard Worker out_patch_size, vtx_out_mask);
279*61046927SAndroid Build Coastguard Worker }
280*61046927SAndroid Build Coastguard Worker
281*61046927SAndroid Build Coastguard Worker /* In a tess eval shader, stride for hw vertex ID */
282*61046927SAndroid Build Coastguard Worker #define LIBAGX_TES_PATCH_ID_STRIDE 8192
283*61046927SAndroid Build Coastguard Worker
284*61046927SAndroid Build Coastguard Worker #endif
285