1 /*
2 * Copyright 2024 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "compiler/nir/nir.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "util/bitscan.h"
9 #include "util/macros.h"
10 #include "agx_compile.h"
11 #include "agx_helpers.h"
12 #include "agx_pack.h"
13 #include "agx_uvs.h"
14 #include "nir_builder_opcodes.h"
15 #include "nir_intrinsics.h"
16 #include "nir_intrinsics_indices.h"
17 #include "shader_enums.h"
18
19 struct ctx {
20 nir_def *layer, *viewport;
21 nir_cursor after_layer_viewport;
22 struct agx_unlinked_uvs_layout *layout;
23 };
24
25 static enum uvs_group
group_for_varying(gl_varying_slot loc)26 group_for_varying(gl_varying_slot loc)
27 {
28 switch (loc) {
29 case VARYING_SLOT_POS:
30 return UVS_POSITION;
31 case VARYING_SLOT_PSIZ:
32 return UVS_PSIZ;
33 default:
34 return UVS_VARYINGS;
35 }
36 }
37
38 static bool
lower(nir_builder * b,nir_intrinsic_instr * intr,void * data)39 lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
40 {
41 struct ctx *ctx = data;
42 if (intr->intrinsic != nir_intrinsic_store_output)
43 return false;
44
45 b->cursor = nir_instr_remove(&intr->instr);
46
47 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
48 unsigned component = nir_intrinsic_component(intr);
49
50 nir_def *value = intr->src[0].ssa;
51 nir_def *offset = intr->src[1].ssa;
52
53 /* If there is only 1 user varying, it is at the base of the varying section.
54 * This saves us an indirection on simple separate shaders.
55 */
56 bool single_vary = util_is_power_of_two_nonzero64(ctx->layout->written);
57 enum uvs_group group = group_for_varying(sem.location);
58
59 nir_def *base;
60 if ((group == UVS_VARYINGS) && !single_vary)
61 base = nir_load_uvs_index_agx(b, .io_semantics = sem);
62 else
63 base = nir_imm_intN_t(b, ctx->layout->group_offs[group], 16);
64
65 nir_def *index = nir_iadd(b, nir_iadd_imm(b, base, component),
66 nir_imul_imm(b, nir_u2u16(b, offset), 4));
67
68 if (sem.location != VARYING_SLOT_LAYER)
69 nir_store_uvs_agx(b, value, index);
70
71 /* Insert clip distance sysval writes, and gather layer/viewport writes so we
72 * can accumulate their system value. These are still lowered like normal to
73 * write them for the varying FS input.
74 */
75 if (sem.location == VARYING_SLOT_LAYER) {
76 assert(ctx->layer == NULL && "only written once");
77 ctx->layer = value;
78 ctx->after_layer_viewport = nir_after_instr(index->parent_instr);
79 } else if (sem.location == VARYING_SLOT_VIEWPORT) {
80 assert(ctx->viewport == NULL && "only written once");
81 ctx->viewport = value;
82 ctx->after_layer_viewport = nir_after_instr(index->parent_instr);
83 } else if (sem.location == VARYING_SLOT_CLIP_DIST0 ||
84 sem.location == VARYING_SLOT_CLIP_DIST1) {
85
86 unsigned clip_base = ctx->layout->group_offs[UVS_CLIP_DIST];
87 unsigned c = 4 * (sem.location - VARYING_SLOT_CLIP_DIST0) + component;
88
89 if (c < b->shader->info.clip_distance_array_size) {
90 nir_def *index = nir_iadd_imm(
91 b, nir_imul_imm(b, nir_u2u16(b, offset), 4), clip_base + c);
92
93 nir_store_uvs_agx(b, value, index);
94 }
95 }
96
97 /* Combined clip/cull used */
98 assert(sem.location != VARYING_SLOT_CULL_DIST0);
99 assert(sem.location != VARYING_SLOT_CULL_DIST1);
100
101 return true;
102 }
103
104 static void
write_layer_viewport_sysval(struct ctx * ctx)105 write_layer_viewport_sysval(struct ctx *ctx)
106 {
107 nir_builder b = nir_builder_at(ctx->after_layer_viewport);
108
109 nir_def *zero = nir_imm_intN_t(&b, 0, 16);
110 nir_def *layer = ctx->layer ? nir_u2u16(&b, ctx->layer) : zero;
111 nir_def *viewport = ctx->viewport ? nir_u2u16(&b, ctx->viewport) : zero;
112
113 nir_store_uvs_agx(
114 &b, nir_pack_32_2x16_split(&b, layer, viewport),
115 nir_imm_int(&b, ctx->layout->group_offs[UVS_LAYER_VIEWPORT]));
116 }
117
118 static bool
gather_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)119 gather_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
120 {
121 struct agx_unlinked_uvs_layout *layout = data;
122 if (intr->intrinsic != nir_intrinsic_store_output)
123 return false;
124
125 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
126 unsigned component = nir_intrinsic_component(intr);
127
128 if (nir_src_is_const(intr->src[1])) {
129 unsigned loc = sem.location + nir_src_as_uint(intr->src[1]);
130 layout->components[loc] = MAX2(layout->components[loc], component + 1);
131 } else {
132 for (unsigned i = 0; i < sem.num_slots; ++i) {
133 layout->components[sem.location + i] = 4;
134 }
135 }
136
137 return false;
138 }
139
140 bool
agx_nir_lower_uvs(nir_shader * s,struct agx_unlinked_uvs_layout * layout)141 agx_nir_lower_uvs(nir_shader *s, struct agx_unlinked_uvs_layout *layout)
142 {
143 bool progress = false;
144
145 /* Scalarize up front so we can ignore vectors later */
146 NIR_PASS(progress, s, nir_lower_io_to_scalar, nir_var_shader_out, NULL,
147 NULL);
148
149 /* Determine the unlinked UVS layout */
150 NIR_PASS(progress, s, nir_shader_intrinsics_pass, gather_components,
151 nir_metadata_control_flow, layout);
152
153 unsigned sizes[UVS_NUM_GROUP] = {
154 [UVS_POSITION] = 4,
155 [UVS_PSIZ] = !!(s->info.outputs_written & VARYING_BIT_PSIZ),
156 [UVS_LAYER_VIEWPORT] = !!(s->info.outputs_written &
157 (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)),
158 [UVS_CLIP_DIST] = s->info.clip_distance_array_size,
159 };
160
161 for (unsigned i = 0; i < ARRAY_SIZE(layout->components); ++i) {
162 if (i != VARYING_SLOT_POS && i != VARYING_SLOT_PSIZ &&
163 i != VARYING_SLOT_LAYER && layout->components[i]) {
164
165 layout->written |= BITFIELD64_BIT(i);
166 sizes[UVS_VARYINGS] += layout->components[i];
167 }
168 }
169
170 unsigned offs = 0;
171 for (enum uvs_group g = 0; g < UVS_NUM_GROUP; ++g) {
172 layout->group_offs[g] = offs;
173 offs += sizes[g];
174 }
175
176 layout->size = offs;
177 layout->user_size = sizes[UVS_VARYINGS];
178
179 /* Now lower in terms of the unlinked layout */
180 struct ctx ctx = {.layout = layout};
181 NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower,
182 nir_metadata_control_flow, &ctx);
183
184 if (ctx.layer || ctx.viewport) {
185 write_layer_viewport_sysval(&ctx);
186 }
187
188 /* Finally, pack what we can. It's much cheaper to do this at compile-time
189 * than draw-time.
190 */
191 agx_pack(&layout->osel, OUTPUT_SELECT, cfg) {
192 cfg.point_size = sizes[UVS_PSIZ];
193 cfg.viewport_target = sizes[UVS_LAYER_VIEWPORT];
194 cfg.render_target = cfg.viewport_target;
195
196 cfg.clip_distance_plane_0 = sizes[UVS_CLIP_DIST] > 0;
197 cfg.clip_distance_plane_1 = sizes[UVS_CLIP_DIST] > 1;
198 cfg.clip_distance_plane_2 = sizes[UVS_CLIP_DIST] > 2;
199 cfg.clip_distance_plane_3 = sizes[UVS_CLIP_DIST] > 3;
200 cfg.clip_distance_plane_4 = sizes[UVS_CLIP_DIST] > 4;
201 cfg.clip_distance_plane_5 = sizes[UVS_CLIP_DIST] > 5;
202 cfg.clip_distance_plane_6 = sizes[UVS_CLIP_DIST] > 6;
203 cfg.clip_distance_plane_7 = sizes[UVS_CLIP_DIST] > 7;
204 }
205
206 agx_pack(&layout->vdm, VDM_STATE_VERTEX_OUTPUTS, cfg) {
207 cfg.output_count_1 = offs;
208 cfg.output_count_2 = offs;
209 }
210
211 return progress;
212 }
213
214 void
agx_assign_uvs(struct agx_varyings_vs * varyings,struct agx_unlinked_uvs_layout * layout,uint64_t flat_mask,uint64_t linear_mask)215 agx_assign_uvs(struct agx_varyings_vs *varyings,
216 struct agx_unlinked_uvs_layout *layout, uint64_t flat_mask,
217 uint64_t linear_mask)
218 {
219 *varyings = (struct agx_varyings_vs){0};
220
221 /* These are always flat-shaded from the FS perspective */
222 flat_mask |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
223
224 /* The internal cull distance slots are always linearly-interpolated */
225 linear_mask |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
226
227 assert(!(flat_mask & linear_mask));
228
229 /* TODO: Link FP16 varyings */
230 unsigned num_32_smooth = 0, num_32_flat = 0, num_32_linear = 0;
231 struct {
232 uint32_t *num;
233 uint64_t mask;
234 } parts[] = {
235 {&num_32_smooth, ~flat_mask & ~linear_mask},
236 {&num_32_flat, flat_mask},
237 {&num_32_linear, linear_mask},
238 };
239
240 unsigned base = layout->group_offs[UVS_VARYINGS];
241
242 for (unsigned p = 0; p < ARRAY_SIZE(parts); ++p) {
243 u_foreach_bit64(loc, parts[p].mask & layout->written) {
244 assert(loc < ARRAY_SIZE(varyings->slots));
245 varyings->slots[loc] = base;
246
247 base += layout->components[loc];
248 (*parts[p].num) += layout->components[loc];
249 }
250 }
251
252 agx_pack(&varyings->counts_32, VARYING_COUNTS, cfg) {
253 cfg.smooth = num_32_smooth;
254 cfg.flat = num_32_flat;
255 cfg.linear = num_32_linear;
256 }
257
258 agx_pack(&varyings->counts_16, VARYING_COUNTS, cfg) {
259 cfg.smooth = 0;
260 cfg.flat = 0;
261 cfg.linear = 0;
262 }
263 }
264
265 static inline enum agx_shade_model
translate_flat_shade_model(unsigned provoking_vertex)266 translate_flat_shade_model(unsigned provoking_vertex)
267 {
268 static_assert(AGX_SHADE_MODEL_FLAT_VERTEX_0 == 0, "hw");
269 static_assert(AGX_SHADE_MODEL_FLAT_VERTEX_2 == 2, "hw");
270
271 assert(provoking_vertex <= 2);
272
273 if (provoking_vertex == 1)
274 return AGX_SHADE_MODEL_FLAT_VERTEX_1;
275 else
276 return (enum agx_shade_model)provoking_vertex;
277 }
278
279 void
agx_link_varyings_vs_fs(void * out,struct agx_varyings_vs * vs,unsigned nr_user_indices,struct agx_varyings_fs * fs,unsigned provoking_vertex,uint8_t sprite_coord_enable,bool * generate_primitive_id)280 agx_link_varyings_vs_fs(void *out, struct agx_varyings_vs *vs,
281 unsigned nr_user_indices, struct agx_varyings_fs *fs,
282 unsigned provoking_vertex, uint8_t sprite_coord_enable,
283 bool *generate_primitive_id)
284 {
285 assert(fs->nr_bindings > 0);
286
287 *generate_primitive_id = false;
288
289 struct agx_cf_binding_header_packed *header = out;
290 struct agx_cf_binding_packed *bindings = (void *)(header + 1);
291
292 unsigned user_base = 1 + (fs->reads_z ? 1 : 0);
293 unsigned nr_slots = user_base + nr_user_indices;
294
295 agx_pack(header, CF_BINDING_HEADER, cfg) {
296 cfg.number_of_32_bit_slots = nr_slots;
297 cfg.number_of_coefficient_registers = fs->nr_cf;
298 }
299
300 for (unsigned i = 0; i < fs->nr_bindings; ++i) {
301 struct agx_cf_binding b = fs->bindings[i];
302
303 agx_pack(bindings + i, CF_BINDING, cfg) {
304 cfg.base_coefficient_register = b.cf_base;
305 cfg.components = b.count;
306
307 if (b.smooth) {
308 cfg.shade_model = b.perspective ? AGX_SHADE_MODEL_PERSPECTIVE
309 : AGX_SHADE_MODEL_LINEAR;
310 } else {
311 cfg.shade_model = translate_flat_shade_model(provoking_vertex);
312 }
313
314 if (b.slot == VARYING_SLOT_PNTC ||
315 (b.slot >= VARYING_SLOT_TEX0 && b.slot <= VARYING_SLOT_TEX7 &&
316 (sprite_coord_enable &
317 BITFIELD_BIT(b.slot - VARYING_SLOT_TEX0)))) {
318
319 assert(b.offset == 0);
320 cfg.source = AGX_COEFFICIENT_SOURCE_POINT_COORD;
321 } else if (b.slot == VARYING_SLOT_PRIMITIVE_ID &&
322 !vs->slots[VARYING_SLOT_PRIMITIVE_ID]) {
323 cfg.source = AGX_COEFFICIENT_SOURCE_PRIMITIVE_ID;
324 *generate_primitive_id = true;
325 } else if (b.slot == VARYING_SLOT_POS) {
326 assert(b.offset >= 2 && "gl_Position.xy are not varyings");
327 assert(fs->reads_z || b.offset != 2);
328
329 if (b.offset == 2) {
330 cfg.source = AGX_COEFFICIENT_SOURCE_FRAGCOORD_Z;
331 cfg.base_slot = 1;
332 } else {
333 assert(!b.perspective && "W must not be perspective divided");
334 }
335 } else {
336 unsigned vs_index = vs->slots[b.slot];
337 assert(b.offset < 4);
338
339 /* Varyings not written by vertex shader are undefined but we can't
340 * crash */
341 if (vs_index) {
342 assert(vs_index >= 4 &&
343 "gl_Position should have been the first 4 slots");
344
345 cfg.base_slot = user_base + (vs_index - 4) + b.offset;
346 }
347 }
348
349 assert(cfg.base_coefficient_register + cfg.components <= fs->nr_cf &&
350 "overflowed coefficient registers");
351 }
352 }
353 }
354