xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/agx_nir_lower_uvs.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "compiler/nir/nir.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "util/bitscan.h"
9 #include "util/macros.h"
10 #include "agx_compile.h"
11 #include "agx_helpers.h"
12 #include "agx_pack.h"
13 #include "agx_uvs.h"
14 #include "nir_builder_opcodes.h"
15 #include "nir_intrinsics.h"
16 #include "nir_intrinsics_indices.h"
17 #include "shader_enums.h"
18 
19 struct ctx {
20    nir_def *layer, *viewport;
21    nir_cursor after_layer_viewport;
22    struct agx_unlinked_uvs_layout *layout;
23 };
24 
25 static enum uvs_group
group_for_varying(gl_varying_slot loc)26 group_for_varying(gl_varying_slot loc)
27 {
28    switch (loc) {
29    case VARYING_SLOT_POS:
30       return UVS_POSITION;
31    case VARYING_SLOT_PSIZ:
32       return UVS_PSIZ;
33    default:
34       return UVS_VARYINGS;
35    }
36 }
37 
38 static bool
lower(nir_builder * b,nir_intrinsic_instr * intr,void * data)39 lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
40 {
41    struct ctx *ctx = data;
42    if (intr->intrinsic != nir_intrinsic_store_output)
43       return false;
44 
45    b->cursor = nir_instr_remove(&intr->instr);
46 
47    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
48    unsigned component = nir_intrinsic_component(intr);
49 
50    nir_def *value = intr->src[0].ssa;
51    nir_def *offset = intr->src[1].ssa;
52 
53    /* If there is only 1 user varying, it is at the base of the varying section.
54     * This saves us an indirection on simple separate shaders.
55     */
56    bool single_vary = util_is_power_of_two_nonzero64(ctx->layout->written);
57    enum uvs_group group = group_for_varying(sem.location);
58 
59    nir_def *base;
60    if ((group == UVS_VARYINGS) && !single_vary)
61       base = nir_load_uvs_index_agx(b, .io_semantics = sem);
62    else
63       base = nir_imm_intN_t(b, ctx->layout->group_offs[group], 16);
64 
65    nir_def *index = nir_iadd(b, nir_iadd_imm(b, base, component),
66                              nir_imul_imm(b, nir_u2u16(b, offset), 4));
67 
68    if (sem.location != VARYING_SLOT_LAYER)
69       nir_store_uvs_agx(b, value, index);
70 
71    /* Insert clip distance sysval writes, and gather layer/viewport writes so we
72     * can accumulate their system value. These are still lowered like normal to
73     * write them for the varying FS input.
74     */
75    if (sem.location == VARYING_SLOT_LAYER) {
76       assert(ctx->layer == NULL && "only written once");
77       ctx->layer = value;
78       ctx->after_layer_viewport = nir_after_instr(index->parent_instr);
79    } else if (sem.location == VARYING_SLOT_VIEWPORT) {
80       assert(ctx->viewport == NULL && "only written once");
81       ctx->viewport = value;
82       ctx->after_layer_viewport = nir_after_instr(index->parent_instr);
83    } else if (sem.location == VARYING_SLOT_CLIP_DIST0 ||
84               sem.location == VARYING_SLOT_CLIP_DIST1) {
85 
86       unsigned clip_base = ctx->layout->group_offs[UVS_CLIP_DIST];
87       unsigned c = 4 * (sem.location - VARYING_SLOT_CLIP_DIST0) + component;
88 
89       if (c < b->shader->info.clip_distance_array_size) {
90          nir_def *index = nir_iadd_imm(
91             b, nir_imul_imm(b, nir_u2u16(b, offset), 4), clip_base + c);
92 
93          nir_store_uvs_agx(b, value, index);
94       }
95    }
96 
97    /* Combined clip/cull used */
98    assert(sem.location != VARYING_SLOT_CULL_DIST0);
99    assert(sem.location != VARYING_SLOT_CULL_DIST1);
100 
101    return true;
102 }
103 
104 static void
write_layer_viewport_sysval(struct ctx * ctx)105 write_layer_viewport_sysval(struct ctx *ctx)
106 {
107    nir_builder b = nir_builder_at(ctx->after_layer_viewport);
108 
109    nir_def *zero = nir_imm_intN_t(&b, 0, 16);
110    nir_def *layer = ctx->layer ? nir_u2u16(&b, ctx->layer) : zero;
111    nir_def *viewport = ctx->viewport ? nir_u2u16(&b, ctx->viewport) : zero;
112 
113    nir_store_uvs_agx(
114       &b, nir_pack_32_2x16_split(&b, layer, viewport),
115       nir_imm_int(&b, ctx->layout->group_offs[UVS_LAYER_VIEWPORT]));
116 }
117 
118 static bool
gather_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)119 gather_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
120 {
121    struct agx_unlinked_uvs_layout *layout = data;
122    if (intr->intrinsic != nir_intrinsic_store_output)
123       return false;
124 
125    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
126    unsigned component = nir_intrinsic_component(intr);
127 
128    if (nir_src_is_const(intr->src[1])) {
129       unsigned loc = sem.location + nir_src_as_uint(intr->src[1]);
130       layout->components[loc] = MAX2(layout->components[loc], component + 1);
131    } else {
132       for (unsigned i = 0; i < sem.num_slots; ++i) {
133          layout->components[sem.location + i] = 4;
134       }
135    }
136 
137    return false;
138 }
139 
140 bool
agx_nir_lower_uvs(nir_shader * s,struct agx_unlinked_uvs_layout * layout)141 agx_nir_lower_uvs(nir_shader *s, struct agx_unlinked_uvs_layout *layout)
142 {
143    bool progress = false;
144 
145    /* Scalarize up front so we can ignore vectors later */
146    NIR_PASS(progress, s, nir_lower_io_to_scalar, nir_var_shader_out, NULL,
147             NULL);
148 
149    /* Determine the unlinked UVS layout */
150    NIR_PASS(progress, s, nir_shader_intrinsics_pass, gather_components,
151             nir_metadata_control_flow, layout);
152 
153    unsigned sizes[UVS_NUM_GROUP] = {
154       [UVS_POSITION] = 4,
155       [UVS_PSIZ] = !!(s->info.outputs_written & VARYING_BIT_PSIZ),
156       [UVS_LAYER_VIEWPORT] = !!(s->info.outputs_written &
157                                 (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT)),
158       [UVS_CLIP_DIST] = s->info.clip_distance_array_size,
159    };
160 
161    for (unsigned i = 0; i < ARRAY_SIZE(layout->components); ++i) {
162       if (i != VARYING_SLOT_POS && i != VARYING_SLOT_PSIZ &&
163           i != VARYING_SLOT_LAYER && layout->components[i]) {
164 
165          layout->written |= BITFIELD64_BIT(i);
166          sizes[UVS_VARYINGS] += layout->components[i];
167       }
168    }
169 
170    unsigned offs = 0;
171    for (enum uvs_group g = 0; g < UVS_NUM_GROUP; ++g) {
172       layout->group_offs[g] = offs;
173       offs += sizes[g];
174    }
175 
176    layout->size = offs;
177    layout->user_size = sizes[UVS_VARYINGS];
178 
179    /* Now lower in terms of the unlinked layout */
180    struct ctx ctx = {.layout = layout};
181    NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower,
182             nir_metadata_control_flow, &ctx);
183 
184    if (ctx.layer || ctx.viewport) {
185       write_layer_viewport_sysval(&ctx);
186    }
187 
188    /* Finally, pack what we can. It's much cheaper to do this at compile-time
189     * than draw-time.
190     */
191    agx_pack(&layout->osel, OUTPUT_SELECT, cfg) {
192       cfg.point_size = sizes[UVS_PSIZ];
193       cfg.viewport_target = sizes[UVS_LAYER_VIEWPORT];
194       cfg.render_target = cfg.viewport_target;
195 
196       cfg.clip_distance_plane_0 = sizes[UVS_CLIP_DIST] > 0;
197       cfg.clip_distance_plane_1 = sizes[UVS_CLIP_DIST] > 1;
198       cfg.clip_distance_plane_2 = sizes[UVS_CLIP_DIST] > 2;
199       cfg.clip_distance_plane_3 = sizes[UVS_CLIP_DIST] > 3;
200       cfg.clip_distance_plane_4 = sizes[UVS_CLIP_DIST] > 4;
201       cfg.clip_distance_plane_5 = sizes[UVS_CLIP_DIST] > 5;
202       cfg.clip_distance_plane_6 = sizes[UVS_CLIP_DIST] > 6;
203       cfg.clip_distance_plane_7 = sizes[UVS_CLIP_DIST] > 7;
204    }
205 
206    agx_pack(&layout->vdm, VDM_STATE_VERTEX_OUTPUTS, cfg) {
207       cfg.output_count_1 = offs;
208       cfg.output_count_2 = offs;
209    }
210 
211    return progress;
212 }
213 
214 void
agx_assign_uvs(struct agx_varyings_vs * varyings,struct agx_unlinked_uvs_layout * layout,uint64_t flat_mask,uint64_t linear_mask)215 agx_assign_uvs(struct agx_varyings_vs *varyings,
216                struct agx_unlinked_uvs_layout *layout, uint64_t flat_mask,
217                uint64_t linear_mask)
218 {
219    *varyings = (struct agx_varyings_vs){0};
220 
221    /* These are always flat-shaded from the FS perspective */
222    flat_mask |= VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;
223 
224    /* The internal cull distance slots are always linearly-interpolated */
225    linear_mask |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE, 2);
226 
227    assert(!(flat_mask & linear_mask));
228 
229    /* TODO: Link FP16 varyings */
230    unsigned num_32_smooth = 0, num_32_flat = 0, num_32_linear = 0;
231    struct {
232       uint32_t *num;
233       uint64_t mask;
234    } parts[] = {
235       {&num_32_smooth, ~flat_mask & ~linear_mask},
236       {&num_32_flat, flat_mask},
237       {&num_32_linear, linear_mask},
238    };
239 
240    unsigned base = layout->group_offs[UVS_VARYINGS];
241 
242    for (unsigned p = 0; p < ARRAY_SIZE(parts); ++p) {
243       u_foreach_bit64(loc, parts[p].mask & layout->written) {
244          assert(loc < ARRAY_SIZE(varyings->slots));
245          varyings->slots[loc] = base;
246 
247          base += layout->components[loc];
248          (*parts[p].num) += layout->components[loc];
249       }
250    }
251 
252    agx_pack(&varyings->counts_32, VARYING_COUNTS, cfg) {
253       cfg.smooth = num_32_smooth;
254       cfg.flat = num_32_flat;
255       cfg.linear = num_32_linear;
256    }
257 
258    agx_pack(&varyings->counts_16, VARYING_COUNTS, cfg) {
259       cfg.smooth = 0;
260       cfg.flat = 0;
261       cfg.linear = 0;
262    }
263 }
264 
265 static inline enum agx_shade_model
translate_flat_shade_model(unsigned provoking_vertex)266 translate_flat_shade_model(unsigned provoking_vertex)
267 {
268    static_assert(AGX_SHADE_MODEL_FLAT_VERTEX_0 == 0, "hw");
269    static_assert(AGX_SHADE_MODEL_FLAT_VERTEX_2 == 2, "hw");
270 
271    assert(provoking_vertex <= 2);
272 
273    if (provoking_vertex == 1)
274       return AGX_SHADE_MODEL_FLAT_VERTEX_1;
275    else
276       return (enum agx_shade_model)provoking_vertex;
277 }
278 
279 void
agx_link_varyings_vs_fs(void * out,struct agx_varyings_vs * vs,unsigned nr_user_indices,struct agx_varyings_fs * fs,unsigned provoking_vertex,uint8_t sprite_coord_enable,bool * generate_primitive_id)280 agx_link_varyings_vs_fs(void *out, struct agx_varyings_vs *vs,
281                         unsigned nr_user_indices, struct agx_varyings_fs *fs,
282                         unsigned provoking_vertex, uint8_t sprite_coord_enable,
283                         bool *generate_primitive_id)
284 {
285    assert(fs->nr_bindings > 0);
286 
287    *generate_primitive_id = false;
288 
289    struct agx_cf_binding_header_packed *header = out;
290    struct agx_cf_binding_packed *bindings = (void *)(header + 1);
291 
292    unsigned user_base = 1 + (fs->reads_z ? 1 : 0);
293    unsigned nr_slots = user_base + nr_user_indices;
294 
295    agx_pack(header, CF_BINDING_HEADER, cfg) {
296       cfg.number_of_32_bit_slots = nr_slots;
297       cfg.number_of_coefficient_registers = fs->nr_cf;
298    }
299 
300    for (unsigned i = 0; i < fs->nr_bindings; ++i) {
301       struct agx_cf_binding b = fs->bindings[i];
302 
303       agx_pack(bindings + i, CF_BINDING, cfg) {
304          cfg.base_coefficient_register = b.cf_base;
305          cfg.components = b.count;
306 
307          if (b.smooth) {
308             cfg.shade_model = b.perspective ? AGX_SHADE_MODEL_PERSPECTIVE
309                                             : AGX_SHADE_MODEL_LINEAR;
310          } else {
311             cfg.shade_model = translate_flat_shade_model(provoking_vertex);
312          }
313 
314          if (b.slot == VARYING_SLOT_PNTC ||
315              (b.slot >= VARYING_SLOT_TEX0 && b.slot <= VARYING_SLOT_TEX7 &&
316               (sprite_coord_enable &
317                BITFIELD_BIT(b.slot - VARYING_SLOT_TEX0)))) {
318 
319             assert(b.offset == 0);
320             cfg.source = AGX_COEFFICIENT_SOURCE_POINT_COORD;
321          } else if (b.slot == VARYING_SLOT_PRIMITIVE_ID &&
322                     !vs->slots[VARYING_SLOT_PRIMITIVE_ID]) {
323             cfg.source = AGX_COEFFICIENT_SOURCE_PRIMITIVE_ID;
324             *generate_primitive_id = true;
325          } else if (b.slot == VARYING_SLOT_POS) {
326             assert(b.offset >= 2 && "gl_Position.xy are not varyings");
327             assert(fs->reads_z || b.offset != 2);
328 
329             if (b.offset == 2) {
330                cfg.source = AGX_COEFFICIENT_SOURCE_FRAGCOORD_Z;
331                cfg.base_slot = 1;
332             } else {
333                assert(!b.perspective && "W must not be perspective divided");
334             }
335          } else {
336             unsigned vs_index = vs->slots[b.slot];
337             assert(b.offset < 4);
338 
339             /* Varyings not written by vertex shader are undefined but we can't
340              * crash */
341             if (vs_index) {
342                assert(vs_index >= 4 &&
343                       "gl_Position should have been the first 4 slots");
344 
345                cfg.base_slot = user_base + (vs_index - 4) + b.offset;
346             }
347          }
348 
349          assert(cfg.base_coefficient_register + cfg.components <= fs->nr_cf &&
350                 "overflowed coefficient registers");
351       }
352    }
353 }
354