1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "compiler/v3d_compiler.h"
25 #include "compiler/nir/nir_builder.h"
26
27 /**
28 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29 * intrinsics into something amenable to the V3D architecture.
30 *
31 * Most of the work is turning the VS's store_output intrinsics from working
32 * on a base representing the gallium-level vec4 driver_location to an offset
33 * within the VPM, and emitting the header that's read by the fixed function
34 * hardware between the VS and FS.
35 *
36 * We also adjust the offsets on uniform loads to be in bytes, since that's
37 * what we need for indirect addressing with general TMU access.
38 */
39
40 struct v3d_nir_lower_io_state {
41 int pos_vpm_offset;
42 int vp_vpm_offset;
43 int zs_vpm_offset;
44 int rcp_wc_vpm_offset;
45 int psiz_vpm_offset;
46 int varyings_vpm_offset;
47
48 /* Geometry shader state */
49 struct {
50 /* VPM offset for the current vertex data output */
51 nir_variable *output_offset_var;
52 /* VPM offset for the current vertex header */
53 nir_variable *header_offset_var;
54 /* VPM header for the current vertex */
55 nir_variable *header_var;
56
57 /* Size of the complete VPM output header */
58 uint32_t output_header_size;
59 /* Size of the output data for a single vertex */
60 uint32_t output_vertex_data_size;
61 } gs;
62
63 BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
64
65 nir_def *pos[4];
66 };
67
68 static void
69 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
70 struct v3d_nir_lower_io_state *state);
71
72 static void
v3d_nir_store_output(nir_builder * b,int base,nir_def * offset,nir_def * chan)73 v3d_nir_store_output(nir_builder *b, int base, nir_def *offset,
74 nir_def *chan)
75 {
76 if (offset) {
77 /* When generating the VIR instruction, the base and the offset
78 * are just going to get added together with an ADD instruction
79 * so we might as well do the add here at the NIR level instead
80 * and let the constant folding do its magic.
81 */
82 offset = nir_iadd_imm(b, offset, base);
83 base = 0;
84 } else {
85 offset = nir_imm_int(b, 0);
86 }
87
88 nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0,
89 .src_type = nir_type_uint | chan->bit_size);
90 }
91
92 static int
v3d_varying_slot_vpm_offset(struct v3d_compile * c,unsigned location,unsigned component)93 v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component)
94 {
95 uint32_t num_used_outputs = 0;
96 struct v3d_varying_slot *used_outputs = NULL;
97 switch (c->s->info.stage) {
98 case MESA_SHADER_VERTEX:
99 num_used_outputs = c->vs_key->num_used_outputs;
100 used_outputs = c->vs_key->used_outputs;
101 break;
102 case MESA_SHADER_GEOMETRY:
103 num_used_outputs = c->gs_key->num_used_outputs;
104 used_outputs = c->gs_key->used_outputs;
105 break;
106 default:
107 unreachable("Unsupported shader stage");
108 }
109
110 for (int i = 0; i < num_used_outputs; i++) {
111 struct v3d_varying_slot slot = used_outputs[i];
112
113 if (v3d_slot_get_slot(slot) == location &&
114 v3d_slot_get_component(slot) == component) {
115 return i;
116 }
117 }
118
119 return -1;
120 }
121
122 /* Lowers a store_output(gallium driver location) to a series of store_outputs
123 * with a driver_location equal to the offset in the VPM.
124 *
125 * For geometry shaders we need to emit multiple vertices so the VPM offsets
126 * need to be computed in the shader code based on the current vertex index.
127 */
128 static void
v3d_nir_lower_vpm_output(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * intr,struct v3d_nir_lower_io_state * state)129 v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
130 nir_intrinsic_instr *intr,
131 struct v3d_nir_lower_io_state *state)
132 {
133 b->cursor = nir_before_instr(&intr->instr);
134
135 /* If this is a geometry shader we need to emit our outputs
136 * to the current vertex offset in the VPM.
137 */
138 nir_def *offset_reg =
139 c->s->info.stage == MESA_SHADER_GEOMETRY ?
140 nir_load_var(b, state->gs.output_offset_var) : NULL;
141
142 int start_comp = nir_intrinsic_component(intr);
143 unsigned location = nir_intrinsic_io_semantics(intr).location;
144 nir_def *src = intr->src[0].ssa;
145 /* Save off the components of the position for the setup of VPM inputs
146 * read by fixed function HW.
147 */
148 if (location == VARYING_SLOT_POS) {
149 for (int i = 0; i < intr->num_components; i++) {
150 state->pos[start_comp + i] = nir_channel(b, src, i);
151 }
152 }
153
154 /* Just psiz to the position in the FF header right now. */
155 if (location == VARYING_SLOT_PSIZ &&
156 state->psiz_vpm_offset != -1) {
157 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
158 }
159
160 if (location == VARYING_SLOT_LAYER) {
161 assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
162 nir_def *header = nir_load_var(b, state->gs.header_var);
163 header = nir_iand_imm(b, header, 0xff00ffff);
164
165 /* From the GLES 3.2 spec:
166 *
167 * "When fragments are written to a layered framebuffer, the
168 * fragment’s layer number selects an image from the array
169 * of images at each attachment (...). If the fragment’s
170 * layer number is negative, or greater than or equal to
171 * the minimum number of layers of any attachment, the
172 * effects of the fragment on the framebuffer contents are
173 * undefined."
174 *
175 * This suggests we can just ignore that situation, however,
176 * for V3D an out-of-bounds layer index means that the binner
177 * might do out-of-bounds writes access to the tile state. The
178 * simulator has an assert to catch this, so we play safe here
179 * and we make sure that doesn't happen by setting gl_Layer
180 * to 0 in that case (we always allocate tile state for at
181 * least one layer).
182 */
183 nir_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
184 nir_def *cond = nir_ige(b, src, fb_layers);
185 nir_def *layer_id =
186 nir_bcsel(b, cond,
187 nir_imm_int(b, 0),
188 nir_ishl_imm(b, src, 16));
189 header = nir_ior(b, header, layer_id);
190 nir_store_var(b, state->gs.header_var, header, 0x1);
191 }
192
193 /* Scalarize outputs if it hasn't happened already, since we want to
194 * schedule each VPM write individually. We can skip any output
195 * components not read by the FS.
196 */
197 for (int i = 0; i < intr->num_components; i++) {
198 int vpm_offset =
199 v3d_varying_slot_vpm_offset(c, location, start_comp + i);
200
201 if (!(nir_intrinsic_write_mask(intr) & (1 << i)))
202 continue;
203
204 if (vpm_offset == -1)
205 continue;
206
207 if (nir_src_is_const(intr->src[1]))
208 vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
209
210 /* If this fires it means the shader has too many outputs */
211 assert(BITSET_BITWORD(vpm_offset) < ARRAY_SIZE(state->varyings_stored));
212 BITSET_SET(state->varyings_stored, vpm_offset);
213
214 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
215 offset_reg, nir_channel(b, src, i));
216 }
217
218 nir_instr_remove(&intr->instr);
219 }
220
221 static inline void
reset_gs_header(nir_builder * b,struct v3d_nir_lower_io_state * state)222 reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
223 {
224 const uint8_t NEW_PRIMITIVE_OFFSET = 0;
225 const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
226
227 uint32_t vertex_data_size = state->gs.output_vertex_data_size;
228 assert((vertex_data_size & 0xffffff00) == 0);
229
230 uint32_t header;
231 header = 1 << NEW_PRIMITIVE_OFFSET;
232 header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
233 nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
234 }
235
236 static void
v3d_nir_lower_emit_vertex(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr,struct v3d_nir_lower_io_state * state)237 v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
238 nir_intrinsic_instr *instr,
239 struct v3d_nir_lower_io_state *state)
240 {
241 b->cursor = nir_before_instr(&instr->instr);
242
243 nir_def *header = nir_load_var(b, state->gs.header_var);
244 nir_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
245 nir_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
246
247 /* Emit fixed function outputs */
248 v3d_nir_emit_ff_vpm_outputs(c, b, state);
249
250 /* Emit vertex header */
251 v3d_nir_store_output(b, 0, header_offset, header);
252
253 /* Update VPM offset for next vertex output data and header */
254 output_offset =
255 nir_iadd_imm(b, output_offset,
256 state->gs.output_vertex_data_size);
257
258 header_offset = nir_iadd_imm(b, header_offset, 1);
259
260 /* Reset the New Primitive bit */
261 header = nir_iand_imm(b, header, 0xfffffffe);
262
263 nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
264 nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
265 nir_store_var(b, state->gs.header_var, header, 0x1);
266
267 nir_instr_remove(&instr->instr);
268 }
269
270 static void
v3d_nir_lower_end_primitive(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr,struct v3d_nir_lower_io_state * state)271 v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
272 nir_intrinsic_instr *instr,
273 struct v3d_nir_lower_io_state *state)
274 {
275 assert(state->gs.header_var);
276 b->cursor = nir_before_instr(&instr->instr);
277 reset_gs_header(b, state);
278
279 nir_instr_remove(&instr->instr);
280 }
281
282 /* Some vertex attribute formats may require to apply a swizzle but the hardware
283 * doesn't provide means to do that, so we need to apply the swizzle in the
284 * vertex shader.
285 *
286 * This is required at least in Vulkan to support mandatory vertex attribute
287 * format VK_FORMAT_B8G8R8A8_UNORM.
288 */
289 static void
v3d_nir_lower_vertex_input(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * instr)290 v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
291 nir_intrinsic_instr *instr)
292 {
293 assert(c->s->info.stage == MESA_SHADER_VERTEX);
294
295 if (!c->vs_key->va_swap_rb_mask)
296 return;
297
298 const uint32_t location = nir_intrinsic_io_semantics(instr).location;
299
300 if (!(c->vs_key->va_swap_rb_mask & (1 << location)))
301 return;
302
303 assert(instr->num_components == 1);
304 const uint32_t comp = nir_intrinsic_component(instr);
305 if (comp == 0 || comp == 2)
306 nir_intrinsic_set_component(instr, (comp + 2) % 4);
307 }
308
309 static void
v3d_nir_lower_load_kernel_input(nir_builder * b,nir_intrinsic_instr * instr)310 v3d_nir_lower_load_kernel_input(nir_builder *b, nir_intrinsic_instr *instr)
311 {
312 b->cursor = nir_before_instr(&instr->instr);
313 nir_def *old = &instr->def;
314
315 nir_def *load =
316 nir_load_uniform(b, old->num_components,
317 old->bit_size, instr->src->ssa,
318 .base = nir_intrinsic_base(instr),
319 .range = nir_intrinsic_range(instr),
320 .dest_type = nir_type_uint | old->bit_size);
321
322 nir_def_rewrite_uses(old, load);
323 nir_instr_remove(&instr->instr);
324 }
325
326 static void
v3d_nir_lower_io_instr(struct v3d_compile * c,nir_builder * b,struct nir_instr * instr,struct v3d_nir_lower_io_state * state)327 v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
328 struct nir_instr *instr,
329 struct v3d_nir_lower_io_state *state)
330 {
331 if (instr->type != nir_instr_type_intrinsic)
332 return;
333 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
334
335 switch (intr->intrinsic) {
336 case nir_intrinsic_load_input:
337 if (c->s->info.stage == MESA_SHADER_VERTEX)
338 v3d_nir_lower_vertex_input(c, b, intr);
339 break;
340
341 case nir_intrinsic_load_kernel_input:
342 v3d_nir_lower_load_kernel_input(b, intr);
343 break;
344
345 case nir_intrinsic_store_output:
346 v3d_nir_lower_vpm_output(c, b, intr, state);
347 break;
348
349 case nir_intrinsic_emit_vertex:
350 v3d_nir_lower_emit_vertex(c, b, intr, state);
351 break;
352
353 case nir_intrinsic_end_primitive:
354 v3d_nir_lower_end_primitive(c, b, intr, state);
355 break;
356
357 default:
358 break;
359 }
360 }
361
362 /* Remap the output var's .driver_location. This is purely for
363 * nir_print_shader() so that store_output can map back to a variable name.
364 */
365 static void
v3d_nir_lower_io_update_output_var_base(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)366 v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
367 struct v3d_nir_lower_io_state *state)
368 {
369 nir_foreach_shader_out_variable_safe(var, c->s) {
370 if (var->data.location == VARYING_SLOT_POS &&
371 state->pos_vpm_offset != -1) {
372 var->data.driver_location = state->pos_vpm_offset;
373 continue;
374 }
375
376 if (var->data.location == VARYING_SLOT_PSIZ &&
377 state->psiz_vpm_offset != -1) {
378 var->data.driver_location = state->psiz_vpm_offset;
379 continue;
380 }
381
382 int vpm_offset =
383 v3d_varying_slot_vpm_offset(c,
384 var->data.location,
385 var->data.location_frac);
386 if (vpm_offset != -1) {
387 var->data.driver_location =
388 state->varyings_vpm_offset + vpm_offset;
389 } else {
390 /* If we couldn't find a mapping for the var, delete
391 * it so that its old .driver_location doesn't confuse
392 * nir_print_shader().
393 */
394 exec_node_remove(&var->node);
395 }
396 }
397 }
398
399 static void
v3d_nir_setup_vpm_layout_vs(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)400 v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
401 struct v3d_nir_lower_io_state *state)
402 {
403 uint32_t vpm_offset = 0;
404
405 state->pos_vpm_offset = -1;
406 state->vp_vpm_offset = -1;
407 state->zs_vpm_offset = -1;
408 state->rcp_wc_vpm_offset = -1;
409 state->psiz_vpm_offset = -1;
410
411 bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
412 if (needs_ff_outputs) {
413 if (c->vs_key->is_coord) {
414 state->pos_vpm_offset = vpm_offset;
415 vpm_offset += 4;
416 }
417
418 state->vp_vpm_offset = vpm_offset;
419 vpm_offset += 2;
420
421 if (!c->vs_key->is_coord) {
422 state->zs_vpm_offset = vpm_offset++;
423 state->rcp_wc_vpm_offset = vpm_offset++;
424 }
425
426 if (c->vs_key->per_vertex_point_size)
427 state->psiz_vpm_offset = vpm_offset++;
428 }
429
430 state->varyings_vpm_offset = vpm_offset;
431
432 c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
433 }
434
435 static void
v3d_nir_setup_vpm_layout_gs(struct v3d_compile * c,struct v3d_nir_lower_io_state * state)436 v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
437 struct v3d_nir_lower_io_state *state)
438 {
439 /* 1 header slot for number of output vertices */
440 uint32_t vpm_offset = 1;
441
442 /* 1 header slot per output vertex */
443 const uint32_t num_vertices = c->s->info.gs.vertices_out;
444 vpm_offset += num_vertices;
445
446 state->gs.output_header_size = vpm_offset;
447
448 /* Vertex data: here we only compute offsets into a generic vertex data
449 * elements. When it is time to actually write a particular vertex to
450 * the VPM, we will add the offset for that vertex into the VPM output
451 * to these offsets.
452 *
453 * If geometry shaders are present, they are always the last shader
454 * stage before rasterization, so we always emit fixed function outputs.
455 */
456 vpm_offset = 0;
457 if (c->gs_key->is_coord) {
458 state->pos_vpm_offset = vpm_offset;
459 vpm_offset += 4;
460 } else {
461 state->pos_vpm_offset = -1;
462 }
463
464 state->vp_vpm_offset = vpm_offset;
465 vpm_offset += 2;
466
467 if (!c->gs_key->is_coord) {
468 state->zs_vpm_offset = vpm_offset++;
469 state->rcp_wc_vpm_offset = vpm_offset++;
470 } else {
471 state->zs_vpm_offset = -1;
472 state->rcp_wc_vpm_offset = -1;
473 }
474
475 /* Mesa enables OES_geometry_shader_point_size automatically with
476 * OES_geometry_shader so we always need to handle point size
477 * writes if present.
478 */
479 if (c->gs_key->per_vertex_point_size)
480 state->psiz_vpm_offset = vpm_offset++;
481
482 state->varyings_vpm_offset = vpm_offset;
483
484 state->gs.output_vertex_data_size =
485 state->varyings_vpm_offset + c->gs_key->num_used_outputs;
486
487 c->vpm_output_size =
488 state->gs.output_header_size +
489 state->gs.output_vertex_data_size * num_vertices;
490 }
491
492 static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile * c,nir_builder * b,struct v3d_nir_lower_io_state * state)493 v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
494 struct v3d_nir_lower_io_state *state)
495 {
496 /* If this is a geometry shader we need to emit our fixed function
497 * outputs to the current vertex offset in the VPM.
498 */
499 nir_def *offset_reg =
500 c->s->info.stage == MESA_SHADER_GEOMETRY ?
501 nir_load_var(b, state->gs.output_offset_var) : NULL;
502
503 for (int i = 0; i < 4; i++) {
504 if (!state->pos[i])
505 state->pos[i] = nir_undef(b, 1, 32);
506 }
507
508 nir_def *rcp_wc = nir_frcp(b, state->pos[3]);
509
510 if (state->pos_vpm_offset != -1) {
511 for (int i = 0; i < 4; i++) {
512 v3d_nir_store_output(b, state->pos_vpm_offset + i,
513 offset_reg, state->pos[i]);
514 }
515 }
516
517 if (state->vp_vpm_offset != -1) {
518 for (int i = 0; i < 2; i++) {
519 nir_def *pos;
520 nir_def *scale;
521 pos = state->pos[i];
522 if (i == 0)
523 scale = nir_load_viewport_x_scale(b);
524 else
525 scale = nir_load_viewport_y_scale(b);
526 pos = nir_fmul(b, pos, scale);
527 pos = nir_fmul(b, pos, rcp_wc);
528 /* Pre-V3D 4.3 hardware has a quirk where it expects XY
529 * coordinates in .8 fixed-point format, but then it
530 * will internally round it to .6 fixed-point,
531 * introducing a double rounding. The double rounding
532 * can cause very slight differences in triangle
533 * raterization coverage that can actually be noticed by
534 * some CTS tests.
535 *
536 * The correct fix for this as recommended by Broadcom
537 * is to convert to .8 fixed-point with ffloor().
538 */
539 if (c->devinfo->ver == 42)
540 pos = nir_f2i32(b, nir_ffloor(b, pos));
541 else
542 pos = nir_f2i32(b, nir_fround_even(b, pos));
543
544 v3d_nir_store_output(b, state->vp_vpm_offset + i,
545 offset_reg, pos);
546 }
547 }
548
549 if (state->zs_vpm_offset != -1) {
550 nir_def *z = state->pos[2];
551 z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
552 z = nir_fmul(b, z, rcp_wc);
553 z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
554 v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
555 }
556
557 if (state->rcp_wc_vpm_offset != -1) {
558 v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
559 offset_reg, rcp_wc);
560 }
561
562 /* Store 0 to varyings requested by the FS but not stored by the
563 * previous stage. This should be undefined behavior, but
564 * glsl-routing seems to rely on it.
565 */
566 uint32_t num_used_outputs;
567 switch (c->s->info.stage) {
568 case MESA_SHADER_VERTEX:
569 num_used_outputs = c->vs_key->num_used_outputs;
570 break;
571 case MESA_SHADER_GEOMETRY:
572 num_used_outputs = c->gs_key->num_used_outputs;
573 break;
574 default:
575 unreachable("Unsupported shader stage");
576 }
577
578 for (int i = 0; i < num_used_outputs; i++) {
579 if (!BITSET_TEST(state->varyings_stored, i)) {
580 v3d_nir_store_output(b, state->varyings_vpm_offset + i,
581 offset_reg, nir_imm_int(b, 0));
582 }
583 }
584 }
585
586 static void
emit_gs_prolog(struct v3d_compile * c,nir_builder * b,nir_function_impl * impl,struct v3d_nir_lower_io_state * state)587 emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
588 nir_function_impl *impl,
589 struct v3d_nir_lower_io_state *state)
590 {
591 nir_block *first = nir_start_block(impl);
592 b->cursor = nir_before_block(first);
593
594 const struct glsl_type *uint_type = glsl_uint_type();
595
596 assert(!state->gs.output_offset_var);
597 state->gs.output_offset_var =
598 nir_local_variable_create(impl, uint_type, "output_offset");
599 nir_store_var(b, state->gs.output_offset_var,
600 nir_imm_int(b, state->gs.output_header_size), 0x1);
601
602 assert(!state->gs.header_offset_var);
603 state->gs.header_offset_var =
604 nir_local_variable_create(impl, uint_type, "header_offset");
605 nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
606
607 assert(!state->gs.header_var);
608 state->gs.header_var =
609 nir_local_variable_create(impl, uint_type, "header");
610 reset_gs_header(b, state);
611 }
612
613 static void
emit_gs_vpm_output_header_prolog(struct v3d_compile * c,nir_builder * b,struct v3d_nir_lower_io_state * state)614 emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
615 struct v3d_nir_lower_io_state *state)
616 {
617 const uint8_t VERTEX_COUNT_OFFSET = 16;
618
619 /* Our GS header has 1 generic header slot (at VPM offset 0) and then
620 * one slot per output vertex after it. This means we don't need to
621 * have a variable just to keep track of the number of vertices we
622 * emitted and instead we can just compute it here from the header
623 * offset variable by removing the one generic header slot that always
624 * goes at the beginning of out header.
625 */
626 nir_def *header_offset =
627 nir_load_var(b, state->gs.header_offset_var);
628 nir_def *vertex_count =
629 nir_iadd_imm(b, header_offset, -1);
630 nir_def *header =
631 nir_ior_imm(b,
632 nir_ishl_imm(b, vertex_count,
633 VERTEX_COUNT_OFFSET),
634 state->gs.output_header_size);
635
636 v3d_nir_store_output(b, 0, NULL, header);
637 }
638
639 bool
v3d_nir_lower_io(nir_shader * s,struct v3d_compile * c)640 v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
641 {
642 if (s->info.stage != MESA_SHADER_VERTEX &&
643 s->info.stage != MESA_SHADER_GEOMETRY &&
644 s->info.stage != MESA_SHADER_COMPUTE) {
645 return false;
646 }
647
648 struct v3d_nir_lower_io_state state = { 0 };
649
650 /* Set up the layout of the VPM outputs. */
651 if (s->info.stage == MESA_SHADER_VERTEX)
652 v3d_nir_setup_vpm_layout_vs(c, &state);
653 else if (s->info.stage == MESA_SHADER_GEOMETRY)
654 v3d_nir_setup_vpm_layout_gs(c, &state);
655
656 nir_foreach_function_impl(impl, s) {
657 nir_builder b = nir_builder_create(impl);
658
659 if (c->s->info.stage == MESA_SHADER_GEOMETRY)
660 emit_gs_prolog(c, &b, impl, &state);
661
662 nir_foreach_block(block, impl) {
663 nir_foreach_instr_safe(instr, block)
664 v3d_nir_lower_io_instr(c, &b, instr,
665 &state);
666 }
667
668 nir_block *last = nir_impl_last_block(impl);
669 b.cursor = nir_after_block(last);
670 if (s->info.stage == MESA_SHADER_VERTEX) {
671 v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
672 } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
673 emit_gs_vpm_output_header_prolog(c, &b, &state);
674 }
675
676 nir_metadata_preserve(impl,
677 nir_metadata_control_flow);
678 }
679
680 if (s->info.stage != MESA_SHADER_COMPUTE)
681 v3d_nir_lower_io_update_output_var_base(c, &state);
682
683 /* It is really unlikely that we don't get progress here, and fully
684 * filtering when not would make code more complex, but we are still
685 * interested on getting this lowering going through NIR_PASS
686 */
687 return true;
688 }
689