1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/hash_table.h"
25 #include "util/set.h"
26 #include "nir.h"
27 #include "nir_builder.h"
28
29 /* This file contains various little helpers for doing simple linking in
30 * NIR. Eventually, we'll probably want a full-blown varying packing
31 * implementation in here. Right now, it just deletes unused things.
32 */
33
34 /**
35 * Returns the bits in the inputs_read, or outputs_written
36 * bitfield corresponding to this variable.
37 */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41 if (var->data.location < 0)
42 return 0;
43
44 unsigned location = var->data.patch ? var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
45
46 assert(var->data.mode == nir_var_shader_in ||
47 var->data.mode == nir_var_shader_out);
48 assert(var->data.location >= 0);
49 assert(location < 64);
50
51 const struct glsl_type *type = var->type;
52 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
53 assert(glsl_type_is_array(type));
54 type = glsl_get_array_element(type);
55 }
56
57 unsigned slots = glsl_count_attribute_slots(type, false);
58 return BITFIELD64_MASK(slots) << location;
59 }
60
61 static bool
is_non_generic_patch_var(nir_variable * var)62 is_non_generic_patch_var(nir_variable *var)
63 {
64 return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
65 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
66 var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
67 var->data.location == VARYING_SLOT_BOUNDING_BOX1;
68 }
69
70 static uint8_t
get_num_components(nir_variable * var)71 get_num_components(nir_variable *var)
72 {
73 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
74 return 4;
75
76 return glsl_get_vector_elements(glsl_without_array(var->type));
77 }
78
79 static void
add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)80 add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
81 {
82 nir_foreach_function_impl(impl, shader) {
83 nir_foreach_block(block, impl) {
84 nir_foreach_instr(instr, block) {
85 if (instr->type != nir_instr_type_intrinsic)
86 continue;
87
88 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
89 if (intrin->intrinsic != nir_intrinsic_load_deref)
90 continue;
91
92 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
93 if (!nir_deref_mode_is(deref, nir_var_shader_out))
94 continue;
95
96 nir_variable *var = nir_deref_instr_get_variable(deref);
97 for (unsigned i = 0; i < get_num_components(var); i++) {
98 if (var->data.patch) {
99 if (is_non_generic_patch_var(var))
100 continue;
101
102 patches_read[var->data.location_frac + i] |=
103 get_variable_io_mask(var, shader->info.stage);
104 } else {
105 read[var->data.location_frac + i] |=
106 get_variable_io_mask(var, shader->info.stage);
107 }
108 }
109 }
110 }
111 }
112 }
113
114 static bool
remove_unused_io_access(nir_builder * b,nir_intrinsic_instr * intrin,void * cb_data)115 remove_unused_io_access(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data)
116 {
117 nir_variable_mode mode = *(nir_variable_mode *)cb_data;
118
119 unsigned srcn = 0;
120 switch (intrin->intrinsic) {
121 case nir_intrinsic_load_deref:
122 case nir_intrinsic_store_deref:
123 case nir_intrinsic_interp_deref_at_centroid:
124 case nir_intrinsic_interp_deref_at_sample:
125 case nir_intrinsic_interp_deref_at_offset:
126 case nir_intrinsic_interp_deref_at_vertex:
127 break;
128 case nir_intrinsic_copy_deref:
129 srcn = mode == nir_var_shader_in ? 1 : 0;
130 break;
131 default:
132 return false;
133 }
134
135 nir_variable *var = nir_intrinsic_get_var(intrin, srcn);
136 if (!var || var->data.mode != mode || var->data.location != NUM_TOTAL_VARYING_SLOTS)
137 return false;
138
139 if (intrin->intrinsic != nir_intrinsic_store_deref &&
140 intrin->intrinsic != nir_intrinsic_copy_deref) {
141 b->cursor = nir_before_instr(&intrin->instr);
142 nir_def *undef = nir_undef(b, intrin->num_components, intrin->def.bit_size);
143 nir_def_rewrite_uses(&intrin->def, undef);
144 }
145
146 nir_instr_remove(&intrin->instr);
147 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[srcn]));
148
149 return true;
150 }
151
152 /**
153 * Helper for removing unused shader I/O variables, by demoting them to global
154 * variables (which may then by dead code eliminated).
155 *
156 * Example usage is:
157 *
158 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
159 * read, patches_read) ||
160 * progress;
161 */
162 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)163 nir_remove_unused_io_vars(nir_shader *shader,
164 nir_variable_mode mode,
165 uint64_t *used_by_other_stage,
166 uint64_t *used_by_other_stage_patches)
167 {
168 bool progress = false;
169 uint64_t *used;
170
171 assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
172
173 uint64_t read[4] = { 0 };
174 uint64_t patches_read[4] = { 0 };
175 if (mode == nir_var_shader_out)
176 add_output_reads(shader, read, patches_read);
177
178 nir_foreach_variable_with_modes_safe(var, shader, mode) {
179 if (var->data.patch)
180 used = used_by_other_stage_patches;
181 else
182 used = used_by_other_stage;
183
184 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0 &&
185 !(shader->info.stage == MESA_SHADER_MESH && var->data.location == VARYING_SLOT_PRIMITIVE_ID))
186 continue;
187
188 if (var->data.always_active_io)
189 continue;
190
191 if (var->data.explicit_xfb_buffer)
192 continue;
193
194 uint64_t other_stage = 0;
195 uint64_t this_stage = 0;
196 for (unsigned i = 0; i < get_num_components(var); i++) {
197 other_stage |= used[var->data.location_frac + i];
198 this_stage |= (var->data.patch ? patches_read : read)[var->data.location_frac + i];
199 }
200
201 uint64_t var_mask = get_variable_io_mask(var, shader->info.stage);
202 if (!((other_stage | this_stage) & var_mask)) {
203 /* Mark the variable as removed by setting the location to an invalid value. */
204 var->data.location = NUM_TOTAL_VARYING_SLOTS;
205 exec_node_remove(&var->node);
206 progress = true;
207 }
208 }
209
210 if (progress) {
211 nir_shader_intrinsics_pass(shader, &remove_unused_io_access, nir_metadata_control_flow, &mode);
212 } else {
213 nir_shader_preserve_all_metadata(shader);
214 }
215
216 return progress;
217 }
218
219 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)220 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
221 {
222 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
223 assert(consumer->info.stage != MESA_SHADER_VERTEX);
224
225 uint64_t read[4] = { 0 }, written[4] = { 0 };
226 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
227
228 nir_foreach_shader_out_variable(var, producer) {
229 for (unsigned i = 0; i < get_num_components(var); i++) {
230 if (var->data.patch) {
231 if (is_non_generic_patch_var(var))
232 continue;
233
234 patches_written[var->data.location_frac + i] |=
235 get_variable_io_mask(var, producer->info.stage);
236 } else {
237 written[var->data.location_frac + i] |=
238 get_variable_io_mask(var, producer->info.stage);
239 }
240 }
241 }
242
243 nir_foreach_shader_in_variable(var, consumer) {
244 for (unsigned i = 0; i < get_num_components(var); i++) {
245 if (var->data.patch) {
246 if (is_non_generic_patch_var(var))
247 continue;
248
249 patches_read[var->data.location_frac + i] |=
250 get_variable_io_mask(var, consumer->info.stage);
251 } else {
252 read[var->data.location_frac + i] |=
253 get_variable_io_mask(var, consumer->info.stage);
254 }
255 }
256 }
257
258 bool progress = false;
259 progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
260 patches_read);
261
262 progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
263 patches_written) ||
264 progress;
265
266 return progress;
267 }
268
269 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)270 get_interp_type(nir_variable *var, const struct glsl_type *type,
271 bool default_to_smooth_interp)
272 {
273 if (var->data.per_primitive)
274 return INTERP_MODE_NONE;
275 if (glsl_type_is_integer(type))
276 return INTERP_MODE_FLAT;
277 else if (var->data.interpolation != INTERP_MODE_NONE)
278 return var->data.interpolation;
279 else if (default_to_smooth_interp)
280 return INTERP_MODE_SMOOTH;
281 else
282 return INTERP_MODE_NONE;
283 }
284
285 #define INTERPOLATE_LOC_SAMPLE 0
286 #define INTERPOLATE_LOC_CENTROID 1
287 #define INTERPOLATE_LOC_CENTER 2
288
289 static uint8_t
get_interp_loc(nir_variable * var)290 get_interp_loc(nir_variable *var)
291 {
292 if (var->data.sample)
293 return INTERPOLATE_LOC_SAMPLE;
294 else if (var->data.centroid)
295 return INTERPOLATE_LOC_CENTROID;
296 else
297 return INTERPOLATE_LOC_CENTER;
298 }
299
300 static bool
is_packing_supported_for_type(const struct glsl_type * type)301 is_packing_supported_for_type(const struct glsl_type *type)
302 {
303 /* We ignore complex types such as arrays, matrices, structs and bitsizes
304 * other then 32bit. All other vector types should have been split into
305 * scalar variables by the lower_io_to_scalar pass. The only exception
306 * should be OpenGL xfb varyings.
307 * TODO: add support for more complex types?
308 */
309 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
310 }
311
312 struct assigned_comps {
313 uint8_t comps;
314 uint8_t interp_type;
315 uint8_t interp_loc;
316 bool is_32bit;
317 bool is_mediump;
318 bool is_per_primitive;
319 };
320
321 /* Packing arrays and dual slot varyings is difficult so to avoid complex
322 * algorithms this function just assigns them their existing location for now.
323 * TODO: allow better packing of complex types.
324 */
325 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)326 get_unmoveable_components_masks(nir_shader *shader,
327 nir_variable_mode mode,
328 struct assigned_comps *comps,
329 gl_shader_stage stage,
330 bool default_to_smooth_interp)
331 {
332 nir_foreach_variable_with_modes_safe(var, shader, mode) {
333 assert(var->data.location >= 0);
334
335 /* Only remap things that aren't built-ins. */
336 if (var->data.location >= VARYING_SLOT_VAR0 &&
337 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
338
339 const struct glsl_type *type = var->type;
340 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
341 assert(glsl_type_is_array(type));
342 type = glsl_get_array_element(type);
343 }
344
345 /* If we can pack this varying then don't mark the components as
346 * used.
347 */
348 if (is_packing_supported_for_type(type) &&
349 !var->data.always_active_io)
350 continue;
351
352 unsigned location = var->data.location - VARYING_SLOT_VAR0;
353
354 unsigned elements =
355 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? glsl_get_vector_elements(glsl_without_array(type)) : 4;
356
357 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
358 unsigned slots = glsl_count_attribute_slots(type, false);
359 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
360 unsigned comps_slot2 = 0;
361 for (unsigned i = 0; i < slots; i++) {
362 if (dual_slot) {
363 if (i & 1) {
364 comps[location + i].comps |= ((1 << comps_slot2) - 1);
365 } else {
366 unsigned num_comps = 4 - var->data.location_frac;
367 comps_slot2 = (elements * dmul) - num_comps;
368
369 /* Assume ARB_enhanced_layouts packing rules for doubles */
370 assert(var->data.location_frac == 0 ||
371 var->data.location_frac == 2);
372 assert(comps_slot2 <= 4);
373
374 comps[location + i].comps |=
375 ((1 << num_comps) - 1) << var->data.location_frac;
376 }
377 } else {
378 comps[location + i].comps |=
379 ((1 << (elements * dmul)) - 1) << var->data.location_frac;
380 }
381
382 comps[location + i].interp_type =
383 get_interp_type(var, type, default_to_smooth_interp);
384 comps[location + i].interp_loc = get_interp_loc(var);
385 comps[location + i].is_32bit =
386 glsl_type_is_32bit(glsl_without_array(type));
387 comps[location + i].is_mediump =
388 var->data.precision == GLSL_PRECISION_MEDIUM ||
389 var->data.precision == GLSL_PRECISION_LOW;
390 comps[location + i].is_per_primitive = var->data.per_primitive;
391 }
392 }
393 }
394 }
395
396 struct varying_loc {
397 uint8_t component;
398 uint32_t location;
399 };
400
401 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)402 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
403 uint64_t slots_used_mask, unsigned num_slots)
404 {
405 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
406
407 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
408 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
409 }
410
411 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)412 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
413 {
414 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
415
416 slots_used[var->data.patch ? 1 : 0] |=
417 BITFIELD64_BIT(var->data.location - loc_offset + offset);
418 }
419
420 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)421 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
422 struct varying_loc (*remap)[4],
423 uint64_t *slots_used, uint64_t *out_slots_read,
424 uint32_t *p_slots_used, uint32_t *p_out_slots_read)
425 {
426 const gl_shader_stage stage = shader->info.stage;
427 uint64_t out_slots_read_tmp[2] = { 0 };
428 uint64_t slots_used_tmp[2] = { 0 };
429
430 /* We don't touch builtins so just copy the bitmask */
431 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
432
433 nir_foreach_variable_with_modes(var, shader, mode) {
434 assert(var->data.location >= 0);
435
436 /* Only remap things that aren't built-ins */
437 if (var->data.location >= VARYING_SLOT_VAR0 &&
438 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
439
440 const struct glsl_type *type = var->type;
441 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
442 assert(glsl_type_is_array(type));
443 type = glsl_get_array_element(type);
444 }
445
446 unsigned num_slots = glsl_count_attribute_slots(type, false);
447 bool used_across_stages = false;
448 bool outputs_read = false;
449
450 unsigned location = var->data.location - VARYING_SLOT_VAR0;
451 struct varying_loc *new_loc = &remap[location][var->data.location_frac];
452
453 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
454 uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
455 uint64_t outs_used =
456 var->data.patch ? *p_out_slots_read : *out_slots_read;
457 uint64_t slots =
458 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
459
460 if (slots & used)
461 used_across_stages = true;
462
463 if (slots & outs_used)
464 outputs_read = true;
465
466 if (new_loc->location) {
467 var->data.location = new_loc->location;
468 var->data.location_frac = new_loc->component;
469 }
470
471 if (var->data.always_active_io) {
472 /* We can't apply link time optimisations (specifically array
473 * splitting) to these so we need to copy the existing mask
474 * otherwise we will mess up the mask for things like partially
475 * marked arrays.
476 */
477 if (used_across_stages)
478 mark_all_used_slots(var, slots_used_tmp, used, num_slots);
479
480 if (outputs_read) {
481 mark_all_used_slots(var, out_slots_read_tmp, outs_used,
482 num_slots);
483 }
484 } else {
485 for (unsigned i = 0; i < num_slots; i++) {
486 if (used_across_stages)
487 mark_used_slot(var, slots_used_tmp, i);
488
489 if (outputs_read)
490 mark_used_slot(var, out_slots_read_tmp, i);
491 }
492 }
493 }
494 }
495
496 *slots_used = slots_used_tmp[0];
497 *out_slots_read = out_slots_read_tmp[0];
498 *p_slots_used = slots_used_tmp[1];
499 *p_out_slots_read = out_slots_read_tmp[1];
500 }
501
502 struct varying_component {
503 nir_variable *var;
504 uint8_t interp_type;
505 uint8_t interp_loc;
506 bool is_32bit;
507 bool is_patch;
508 bool is_per_primitive;
509 bool is_mediump;
510 bool is_intra_stage_only;
511 bool initialised;
512 };
513
514 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)515 cmp_varying_component(const void *comp1_v, const void *comp2_v)
516 {
517 struct varying_component *comp1 = (struct varying_component *)comp1_v;
518 struct varying_component *comp2 = (struct varying_component *)comp2_v;
519
520 /* We want patches to be order at the end of the array */
521 if (comp1->is_patch != comp2->is_patch)
522 return comp1->is_patch ? 1 : -1;
523
524 /* Sort per-primitive outputs after per-vertex ones to allow
525 * better compaction when they are mixed in the shader's source.
526 */
527 if (comp1->is_per_primitive != comp2->is_per_primitive)
528 return comp1->is_per_primitive ? 1 : -1;
529
530 /* We want to try to group together TCS outputs that are only read by other
531 * TCS invocations and not consumed by the follow stage.
532 */
533 if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
534 return comp1->is_intra_stage_only ? 1 : -1;
535
536 /* Group mediump varyings together. */
537 if (comp1->is_mediump != comp2->is_mediump)
538 return comp1->is_mediump ? 1 : -1;
539
540 /* We can only pack varyings with matching interpolation types so group
541 * them together.
542 */
543 if (comp1->interp_type != comp2->interp_type)
544 return comp1->interp_type - comp2->interp_type;
545
546 /* Interpolation loc must match also. */
547 if (comp1->interp_loc != comp2->interp_loc)
548 return comp1->interp_loc - comp2->interp_loc;
549
550 /* If everything else matches just use the original location to sort */
551 const struct nir_variable_data *const data1 = &comp1->var->data;
552 const struct nir_variable_data *const data2 = &comp2->var->data;
553 if (data1->location != data2->location)
554 return data1->location - data2->location;
555 return (int)data1->location_frac - (int)data2->location_frac;
556 }
557
558 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)559 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
560 struct varying_component **varying_comp_info,
561 unsigned *varying_comp_info_size,
562 bool default_to_smooth_interp)
563 {
564 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = { { 0 } };
565 unsigned num_of_comps_to_pack = 0;
566
567 /* Count the number of varying that can be packed and create a mapping
568 * of those varyings to the array we will pass to qsort.
569 */
570 nir_foreach_shader_out_variable(var, producer) {
571
572 /* Only remap things that aren't builtins. */
573 if (var->data.location >= VARYING_SLOT_VAR0 &&
574 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
575
576 /* We can't repack xfb varyings. */
577 if (var->data.always_active_io)
578 continue;
579
580 const struct glsl_type *type = var->type;
581 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
582 assert(glsl_type_is_array(type));
583 type = glsl_get_array_element(type);
584 }
585
586 if (!is_packing_supported_for_type(type))
587 continue;
588
589 unsigned loc = var->data.location - VARYING_SLOT_VAR0;
590 store_varying_info_idx[loc][var->data.location_frac] =
591 ++num_of_comps_to_pack;
592 }
593 }
594
595 *varying_comp_info_size = num_of_comps_to_pack;
596 *varying_comp_info = rzalloc_array(NULL, struct varying_component,
597 num_of_comps_to_pack);
598
599 nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
600
601 /* Walk over the shader and populate the varying component info array */
602 nir_foreach_block(block, impl) {
603 nir_foreach_instr(instr, block) {
604 if (instr->type != nir_instr_type_intrinsic)
605 continue;
606
607 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
608 if (intr->intrinsic != nir_intrinsic_load_deref &&
609 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
610 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
611 intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
612 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
613 continue;
614
615 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
616 if (!nir_deref_mode_is(deref, nir_var_shader_in))
617 continue;
618
619 /* We only remap things that aren't builtins. */
620 nir_variable *in_var = nir_deref_instr_get_variable(deref);
621 if (in_var->data.location < VARYING_SLOT_VAR0)
622 continue;
623
624 /* Do not remap per-vertex shader inputs because it's an array of
625 * 3-elements and this isn't supported.
626 */
627 if (in_var->data.per_vertex)
628 continue;
629
630 unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
631 if (location >= MAX_VARYINGS_INCL_PATCH)
632 continue;
633
634 unsigned var_info_idx =
635 store_varying_info_idx[location][in_var->data.location_frac];
636 if (!var_info_idx)
637 continue;
638
639 struct varying_component *vc_info =
640 &(*varying_comp_info)[var_info_idx - 1];
641
642 if (!vc_info->initialised) {
643 const struct glsl_type *type = in_var->type;
644 if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
645 in_var->data.per_view) {
646 assert(glsl_type_is_array(type));
647 type = glsl_get_array_element(type);
648 }
649
650 vc_info->var = in_var;
651 vc_info->interp_type =
652 get_interp_type(in_var, type, default_to_smooth_interp);
653 vc_info->interp_loc = get_interp_loc(in_var);
654 vc_info->is_32bit = glsl_type_is_32bit(type);
655 vc_info->is_patch = in_var->data.patch;
656 vc_info->is_per_primitive = in_var->data.per_primitive;
657 vc_info->is_mediump = !producer->options->linker_ignore_precision &&
658 (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
659 in_var->data.precision == GLSL_PRECISION_LOW);
660 vc_info->is_intra_stage_only = false;
661 vc_info->initialised = true;
662 }
663 }
664 }
665
666 /* Walk over the shader and populate the varying component info array
667 * for varyings which are read by other TCS instances but are not consumed
668 * by the TES.
669 */
670 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
671 impl = nir_shader_get_entrypoint(producer);
672
673 nir_foreach_block(block, impl) {
674 nir_foreach_instr(instr, block) {
675 if (instr->type != nir_instr_type_intrinsic)
676 continue;
677
678 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
679 if (intr->intrinsic != nir_intrinsic_load_deref)
680 continue;
681
682 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
683 if (!nir_deref_mode_is(deref, nir_var_shader_out))
684 continue;
685
686 /* We only remap things that aren't builtins. */
687 nir_variable *out_var = nir_deref_instr_get_variable(deref);
688 if (out_var->data.location < VARYING_SLOT_VAR0)
689 continue;
690
691 unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
692 if (location >= MAX_VARYINGS_INCL_PATCH)
693 continue;
694
695 unsigned var_info_idx =
696 store_varying_info_idx[location][out_var->data.location_frac];
697 if (!var_info_idx) {
698 /* Something went wrong, the shader interfaces didn't match, so
699 * abandon packing. This can happen for example when the
700 * inputs are scalars but the outputs are struct members.
701 */
702 *varying_comp_info_size = 0;
703 break;
704 }
705
706 struct varying_component *vc_info =
707 &(*varying_comp_info)[var_info_idx - 1];
708
709 if (!vc_info->initialised) {
710 const struct glsl_type *type = out_var->type;
711 if (nir_is_arrayed_io(out_var, producer->info.stage)) {
712 assert(glsl_type_is_array(type));
713 type = glsl_get_array_element(type);
714 }
715
716 vc_info->var = out_var;
717 vc_info->interp_type =
718 get_interp_type(out_var, type, default_to_smooth_interp);
719 vc_info->interp_loc = get_interp_loc(out_var);
720 vc_info->is_32bit = glsl_type_is_32bit(type);
721 vc_info->is_patch = out_var->data.patch;
722 vc_info->is_per_primitive = out_var->data.per_primitive;
723 vc_info->is_mediump = !producer->options->linker_ignore_precision &&
724 (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
725 out_var->data.precision == GLSL_PRECISION_LOW);
726 vc_info->is_intra_stage_only = true;
727 vc_info->initialised = true;
728 }
729 }
730 }
731 }
732
733 for (unsigned i = 0; i < *varying_comp_info_size; i++) {
734 struct varying_component *vc_info = &(*varying_comp_info)[i];
735 if (!vc_info->initialised) {
736 /* Something went wrong, the shader interfaces didn't match, so
737 * abandon packing. This can happen for example when the outputs are
738 * scalars but the inputs are struct members.
739 */
740 *varying_comp_info_size = 0;
741 break;
742 }
743 }
744 }
745
746 static bool
allow_pack_interp_type(nir_io_options options,int type)747 allow_pack_interp_type(nir_io_options options, int type)
748 {
749 switch (type) {
750 case INTERP_MODE_NONE:
751 case INTERP_MODE_SMOOTH:
752 case INTERP_MODE_NOPERSPECTIVE:
753 return options & nir_io_has_flexible_input_interpolation_except_flat;
754 default:
755 return false;
756 }
757 }
758
759 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_io_options options)760 assign_remap_locations(struct varying_loc (*remap)[4],
761 struct assigned_comps *assigned_comps,
762 struct varying_component *info,
763 unsigned *cursor, unsigned *comp,
764 unsigned max_location,
765 nir_io_options options)
766 {
767 unsigned tmp_cursor = *cursor;
768 unsigned tmp_comp = *comp;
769
770 for (; tmp_cursor < max_location; tmp_cursor++) {
771
772 if (assigned_comps[tmp_cursor].comps) {
773 /* Don't pack per-primitive and per-vertex varyings together. */
774 if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) {
775 tmp_comp = 0;
776 continue;
777 }
778
779 /* We can only pack varyings with matching precision. */
780 if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
781 tmp_comp = 0;
782 continue;
783 }
784
785 /* We can only pack varyings with matching interpolation type
786 * if driver does not support it.
787 */
788 if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
789 (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
790 !allow_pack_interp_type(options, info->interp_type))) {
791 tmp_comp = 0;
792 continue;
793 }
794
795 /* We can only pack varyings with matching interpolation location
796 * if driver does not support it.
797 */
798 if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
799 !(options & nir_io_has_flexible_input_interpolation_except_flat)) {
800 tmp_comp = 0;
801 continue;
802 }
803
804 /* We can only pack varyings with matching types, and the current
805 * algorithm only supports packing 32-bit.
806 */
807 if (!assigned_comps[tmp_cursor].is_32bit) {
808 tmp_comp = 0;
809 continue;
810 }
811
812 while (tmp_comp < 4 &&
813 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
814 tmp_comp++;
815 }
816 }
817
818 if (tmp_comp == 4) {
819 tmp_comp = 0;
820 continue;
821 }
822
823 unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
824
825 /* Once we have assigned a location mark it as used */
826 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
827 assigned_comps[tmp_cursor].interp_type = info->interp_type;
828 assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
829 assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
830 assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
831 assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive;
832
833 /* Assign remap location */
834 remap[location][info->var->data.location_frac].component = tmp_comp++;
835 remap[location][info->var->data.location_frac].location =
836 tmp_cursor + VARYING_SLOT_VAR0;
837
838 break;
839 }
840
841 *cursor = tmp_cursor;
842 *comp = tmp_comp;
843 }
844
845 /* If there are empty components in the slot compact the remaining components
846 * as close to component 0 as possible. This will make it easier to fill the
847 * empty components with components from a different slot in a following pass.
848 */
849 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)850 compact_components(nir_shader *producer, nir_shader *consumer,
851 struct assigned_comps *assigned_comps,
852 bool default_to_smooth_interp)
853 {
854 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = { { { 0 }, { 0 } } };
855 struct varying_component *varying_comp_info;
856 unsigned varying_comp_info_size;
857
858 /* Gather varying component info */
859 gather_varying_component_info(producer, consumer, &varying_comp_info,
860 &varying_comp_info_size,
861 default_to_smooth_interp);
862
863 /* Sort varying components. */
864 qsort(varying_comp_info, varying_comp_info_size,
865 sizeof(struct varying_component), cmp_varying_component);
866
867 unsigned cursor = 0;
868 unsigned comp = 0;
869
870 /* Set the remap array based on the sorted components */
871 for (unsigned i = 0; i < varying_comp_info_size; i++) {
872 struct varying_component *info = &varying_comp_info[i];
873
874 assert(info->is_patch || cursor < MAX_VARYING);
875 if (info->is_patch) {
876 /* The list should be sorted with all non-patch inputs first followed
877 * by patch inputs. When we hit our first patch input, we need to
878 * reset the cursor to MAX_VARYING so we put them in the right slot.
879 */
880 if (cursor < MAX_VARYING) {
881 cursor = MAX_VARYING;
882 comp = 0;
883 }
884
885 assign_remap_locations(remap, assigned_comps, info,
886 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
887 consumer->options->io_options);
888 } else {
889 assign_remap_locations(remap, assigned_comps, info,
890 &cursor, &comp, MAX_VARYING,
891 consumer->options->io_options);
892
893 /* Check if we failed to assign a remap location. This can happen if
894 * for example there are a bunch of unmovable components with
895 * mismatching interpolation types causing us to skip over locations
896 * that would have been useful for packing later components.
897 * The solution is to iterate over the locations again (this should
898 * happen very rarely in practice).
899 */
900 if (cursor == MAX_VARYING) {
901 cursor = 0;
902 comp = 0;
903 assign_remap_locations(remap, assigned_comps, info,
904 &cursor, &comp, MAX_VARYING,
905 consumer->options->io_options);
906 }
907 }
908 }
909
910 ralloc_free(varying_comp_info);
911
912 uint64_t zero = 0;
913 uint32_t zero32 = 0;
914 remap_slots_and_components(consumer, nir_var_shader_in, remap,
915 &consumer->info.inputs_read, &zero,
916 &consumer->info.patch_inputs_read, &zero32);
917 remap_slots_and_components(producer, nir_var_shader_out, remap,
918 &producer->info.outputs_written,
919 &producer->info.outputs_read,
920 &producer->info.patch_outputs_written,
921 &producer->info.patch_outputs_read);
922 }
923
924 /* We assume that this has been called more-or-less directly after
925 * remove_unused_varyings. At this point, all of the varyings that we
926 * aren't going to be using have been completely removed and the
927 * inputs_read and outputs_written fields in nir_shader_info reflect
928 * this. Therefore, the total set of valid slots is the OR of the two
929 * sets of varyings; this accounts for varyings which one side may need
930 * to read/write even if the other doesn't. This can happen if, for
931 * instance, an array is used indirectly from one side causing it to be
932 * unsplittable but directly from the other.
933 */
934 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)935 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
936 bool default_to_smooth_interp)
937 {
938 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
939 assert(consumer->info.stage != MESA_SHADER_VERTEX);
940
941 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = { { 0 } };
942
943 get_unmoveable_components_masks(producer, nir_var_shader_out,
944 assigned_comps,
945 producer->info.stage,
946 default_to_smooth_interp);
947 get_unmoveable_components_masks(consumer, nir_var_shader_in,
948 assigned_comps,
949 consumer->info.stage,
950 default_to_smooth_interp);
951
952 compact_components(producer, consumer, assigned_comps,
953 default_to_smooth_interp);
954 }
955
956 /*
957 * Mark XFB varyings as always_active_io in the consumer so the linking opts
958 * don't touch them.
959 */
960 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)961 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
962 {
963 nir_variable *input_vars[MAX_VARYING][4] = { 0 };
964
965 nir_foreach_shader_in_variable(var, consumer) {
966 if (var->data.location >= VARYING_SLOT_VAR0 &&
967 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
968
969 unsigned location = var->data.location - VARYING_SLOT_VAR0;
970 input_vars[location][var->data.location_frac] = var;
971 }
972 }
973
974 nir_foreach_shader_out_variable(var, producer) {
975 if (var->data.location >= VARYING_SLOT_VAR0 &&
976 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
977
978 if (!var->data.always_active_io)
979 continue;
980
981 unsigned location = var->data.location - VARYING_SLOT_VAR0;
982 if (input_vars[location][var->data.location_frac]) {
983 input_vars[location][var->data.location_frac]->data.always_active_io = true;
984 }
985 }
986 }
987 }
988
989 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)990 does_varying_match(nir_variable *out_var, nir_variable *in_var)
991 {
992 return in_var->data.location == out_var->data.location &&
993 in_var->data.location_frac == out_var->data.location_frac &&
994 in_var->type == out_var->type;
995 }
996
997 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)998 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
999 {
1000 nir_foreach_shader_in_variable(var, consumer) {
1001 if (does_varying_match(out_var, var))
1002 return var;
1003 }
1004
1005 return NULL;
1006 }
1007
1008 static bool
can_replace_varying(nir_variable * out_var)1009 can_replace_varying(nir_variable *out_var)
1010 {
1011 /* Skip types that require more complex handling.
1012 * TODO: add support for these types.
1013 */
1014 if (glsl_type_is_array(out_var->type) ||
1015 glsl_type_is_dual_slot(out_var->type) ||
1016 glsl_type_is_matrix(out_var->type) ||
1017 glsl_type_is_struct_or_ifc(out_var->type))
1018 return false;
1019
1020 /* Limit this pass to scalars for now to keep things simple. Most varyings
1021 * should have been lowered to scalars at this point anyway.
1022 */
1023 if (!glsl_type_is_scalar(out_var->type))
1024 return false;
1025
1026 if (out_var->data.location < VARYING_SLOT_VAR0 ||
1027 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1028 return false;
1029
1030 return true;
1031 }
1032
1033 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1034 replace_varying_input_by_constant_load(nir_shader *shader,
1035 nir_intrinsic_instr *store_intr)
1036 {
1037 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1038
1039 nir_builder b = nir_builder_create(impl);
1040
1041 nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1042
1043 bool progress = false;
1044 nir_foreach_block(block, impl) {
1045 nir_foreach_instr(instr, block) {
1046 if (instr->type != nir_instr_type_intrinsic)
1047 continue;
1048
1049 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1050 if (intr->intrinsic != nir_intrinsic_load_deref)
1051 continue;
1052
1053 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1054 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1055 continue;
1056
1057 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1058
1059 if (!does_varying_match(out_var, in_var))
1060 continue;
1061
1062 b.cursor = nir_before_instr(instr);
1063
1064 nir_load_const_instr *out_const =
1065 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1066
1067 /* Add new const to replace the input */
1068 nir_def *nconst = nir_build_imm(&b, store_intr->num_components,
1069 intr->def.bit_size,
1070 out_const->value);
1071
1072 nir_def_rewrite_uses(&intr->def, nconst);
1073
1074 progress = true;
1075 }
1076 }
1077
1078 return progress;
1079 }
1080
1081 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1082 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1083 nir_intrinsic_instr *dup_store_intr)
1084 {
1085 assert(input_var);
1086
1087 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1088
1089 nir_builder b = nir_builder_create(impl);
1090
1091 nir_variable *dup_out_var = nir_intrinsic_get_var(dup_store_intr, 0);
1092
1093 bool progress = false;
1094 nir_foreach_block(block, impl) {
1095 nir_foreach_instr(instr, block) {
1096 if (instr->type != nir_instr_type_intrinsic)
1097 continue;
1098
1099 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1100 if (intr->intrinsic != nir_intrinsic_load_deref)
1101 continue;
1102
1103 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1104 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1105 continue;
1106
1107 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1108
1109 if (!does_varying_match(dup_out_var, in_var) ||
1110 in_var->data.interpolation != input_var->data.interpolation ||
1111 get_interp_loc(in_var) != get_interp_loc(input_var) ||
1112 in_var->data.per_vertex)
1113 continue;
1114
1115 b.cursor = nir_before_instr(instr);
1116
1117 nir_def *load = nir_load_var(&b, input_var);
1118 nir_def_rewrite_uses(&intr->def, load);
1119
1120 progress = true;
1121 }
1122 }
1123
1124 return progress;
1125 }
1126
1127 static bool
is_direct_uniform_load(nir_def * def,nir_scalar * s)1128 is_direct_uniform_load(nir_def *def, nir_scalar *s)
1129 {
1130 /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1131 assert(def->num_components == 1);
1132
1133 /* Uniform load may hide behind some move instruction for converting
1134 * vector to scalar:
1135 *
1136 * vec1 32 ssa_1 = deref_var &color (uniform vec3)
1137 * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1138 * vec1 32 ssa_3 = mov ssa_2.x
1139 * vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1140 * intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1141 */
1142 *s = nir_scalar_resolved(def, 0);
1143
1144 nir_def *ssa = s->def;
1145 if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1146 return false;
1147
1148 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1149 if (intr->intrinsic != nir_intrinsic_load_deref)
1150 return false;
1151
1152 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1153 /* TODO: support nir_var_mem_ubo. */
1154 if (!nir_deref_mode_is(deref, nir_var_uniform))
1155 return false;
1156
1157 /* Does not support indirect uniform load. */
1158 return !nir_deref_instr_has_indirect(deref);
1159 }
1160
1161 /**
1162 * Add a uniform variable from one shader to a different shader.
1163 *
1164 * \param nir The shader where to add the uniform
1165 * \param uniform The uniform that's declared in another shader.
1166 */
1167 nir_variable *
nir_clone_uniform_variable(nir_shader * nir,nir_variable * uniform,bool spirv)1168 nir_clone_uniform_variable(nir_shader *nir, nir_variable *uniform, bool spirv)
1169 {
1170 /* Find if uniform already exists in consumer. */
1171 nir_variable *new_var = NULL;
1172 nir_foreach_variable_with_modes(v, nir, uniform->data.mode) {
1173 if ((spirv && uniform->data.mode & nir_var_mem_ubo &&
1174 v->data.binding == uniform->data.binding) ||
1175 (!spirv && !strcmp(uniform->name, v->name))) {
1176 new_var = v;
1177 break;
1178 }
1179 }
1180
1181 /* Create a variable if not exist. */
1182 if (!new_var) {
1183 new_var = nir_variable_clone(uniform, nir);
1184 nir_shader_add_variable(nir, new_var);
1185 }
1186
1187 return new_var;
1188 }
1189
1190 nir_deref_instr *
nir_clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1191 nir_clone_deref_instr(nir_builder *b, nir_variable *var,
1192 nir_deref_instr *deref)
1193 {
1194 if (deref->deref_type == nir_deref_type_var)
1195 return nir_build_deref_var(b, var);
1196
1197 nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1198 nir_deref_instr *parent = nir_clone_deref_instr(b, var, parent_deref);
1199
1200 /* Build array and struct deref instruction.
1201 * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1202 */
1203 switch (deref->deref_type) {
1204 case nir_deref_type_array: {
1205 nir_load_const_instr *index =
1206 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1207 return nir_build_deref_array_imm(b, parent, index->value->i64);
1208 }
1209 case nir_deref_type_ptr_as_array: {
1210 nir_load_const_instr *index =
1211 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1212 nir_def *ssa = nir_imm_intN_t(b, index->value->i64,
1213 parent->def.bit_size);
1214 return nir_build_deref_ptr_as_array(b, parent, ssa);
1215 }
1216 case nir_deref_type_struct:
1217 return nir_build_deref_struct(b, parent, deref->strct.index);
1218 default:
1219 unreachable("invalid type");
1220 return NULL;
1221 }
1222 }
1223
1224 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_scalar * scalar)1225 replace_varying_input_by_uniform_load(nir_shader *shader,
1226 nir_intrinsic_instr *store_intr,
1227 nir_scalar *scalar)
1228 {
1229 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1230
1231 nir_builder b = nir_builder_create(impl);
1232
1233 nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1234
1235 nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1236 nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1237 nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1238 uni_var = nir_clone_uniform_variable(shader, uni_var, false);
1239
1240 bool progress = false;
1241 nir_foreach_block(block, impl) {
1242 nir_foreach_instr(instr, block) {
1243 if (instr->type != nir_instr_type_intrinsic)
1244 continue;
1245
1246 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1247 if (intr->intrinsic != nir_intrinsic_load_deref)
1248 continue;
1249
1250 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1251 if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1252 continue;
1253
1254 nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1255
1256 if (!does_varying_match(out_var, in_var))
1257 continue;
1258
1259 b.cursor = nir_before_instr(instr);
1260
1261 /* Clone instructions start from deref load to variable deref. */
1262 nir_deref_instr *uni_deref = nir_clone_deref_instr(&b, uni_var, deref);
1263 nir_def *uni_def = nir_load_deref(&b, uni_deref);
1264
1265 /* Add a vector to scalar move if uniform is a vector. */
1266 if (uni_def->num_components > 1) {
1267 nir_alu_src src = { 0 };
1268 src.src = nir_src_for_ssa(uni_def);
1269 src.swizzle[0] = scalar->comp;
1270 uni_def = nir_mov_alu(&b, src, 1);
1271 }
1272
1273 /* Replace load input with load uniform. */
1274 nir_def_rewrite_uses(&intr->def, uni_def);
1275
1276 progress = true;
1277 }
1278 }
1279
1280 return progress;
1281 }
1282
1283 /* The GLSL ES 3.20 spec says:
1284 *
1285 * "The precision of a vertex output does not need to match the precision of
1286 * the corresponding fragment input. The minimum precision at which vertex
1287 * outputs are interpolated is the minimum of the vertex output precision and
1288 * the fragment input precision, with the exception that for highp,
1289 * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1290 * Output Matching by Name in Linked Programs")
1291 *
1292 * To implement this, when linking shaders we will take the minimum precision
1293 * qualifier (allowing drivers to interpolate at lower precision). For
1294 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1295 * requires we use the *last* specified precision if there is a conflict.
1296 *
1297 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1298 * NONE, we'll return the other precision, since there is no conflict.
1299 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1300 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1301 * "backwards". For non-fragment stages, we'll pick the latter precision to
1302 * comply with the spec. (Note that the order matters.)
1303 *
1304 * For streamout, "Variables declared with lowp or mediump precision are
1305 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1306 * of OpenGL ES 3.2 specification). So drivers should promote them
1307 * the transform feedback memory store, but not the output store.
1308 */
1309
1310 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1311 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1312 {
1313 if (producer == GLSL_PRECISION_NONE)
1314 return consumer;
1315 else if (consumer == GLSL_PRECISION_NONE)
1316 return producer;
1317 else
1318 return fs ? MAX2(producer, consumer) : consumer;
1319 }
1320
1321 static nir_variable *
find_consumer_variable(const nir_shader * consumer,const nir_variable * producer_var)1322 find_consumer_variable(const nir_shader *consumer,
1323 const nir_variable *producer_var)
1324 {
1325 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) {
1326 if (var->data.location == producer_var->data.location &&
1327 var->data.location_frac == producer_var->data.location_frac)
1328 return var;
1329 }
1330 return NULL;
1331 }
1332
1333 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1334 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1335 {
1336 bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1337
1338 nir_foreach_shader_out_variable(producer_var, producer) {
1339 /* Skip if the slot is not assigned */
1340 if (producer_var->data.location < 0)
1341 continue;
1342
1343 nir_variable *consumer_var = find_consumer_variable(consumer,
1344 producer_var);
1345
1346 /* Skip if the variable will be eliminated */
1347 if (!consumer_var)
1348 continue;
1349
1350 /* Now we have a pair of variables. Let's pick the smaller precision. */
1351 unsigned precision_1 = producer_var->data.precision;
1352 unsigned precision_2 = consumer_var->data.precision;
1353 unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1354
1355 /* Propagate the new precision */
1356 producer_var->data.precision = consumer_var->data.precision = minimum;
1357 }
1358 }
1359
1360 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1361 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1362 {
1363 /* TODO: Add support for more shader stage combinations */
1364 if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1365 (producer->info.stage != MESA_SHADER_VERTEX &&
1366 producer->info.stage != MESA_SHADER_TESS_EVAL))
1367 return false;
1368
1369 bool progress = false;
1370
1371 nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1372
1373 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1374
1375 /* If we find a store in the last block of the producer we can be sure this
1376 * is the only possible value for this output.
1377 */
1378 nir_block *last_block = nir_impl_last_block(impl);
1379 nir_foreach_instr_reverse(instr, last_block) {
1380 if (instr->type != nir_instr_type_intrinsic)
1381 continue;
1382
1383 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1384
1385 if (intr->intrinsic != nir_intrinsic_store_deref)
1386 continue;
1387
1388 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1389 if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1390 continue;
1391
1392 nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1393 if (!can_replace_varying(out_var))
1394 continue;
1395
1396 nir_def *ssa = intr->src[1].ssa;
1397 if (ssa->parent_instr->type == nir_instr_type_load_const) {
1398 progress |= replace_varying_input_by_constant_load(consumer, intr);
1399 continue;
1400 }
1401
1402 nir_scalar uni_scalar;
1403 if (is_direct_uniform_load(ssa, &uni_scalar)) {
1404 if (consumer->options->lower_varying_from_uniform) {
1405 progress |= replace_varying_input_by_uniform_load(consumer, intr,
1406 &uni_scalar);
1407 continue;
1408 } else {
1409 nir_variable *in_var = get_matching_input_var(consumer, out_var);
1410 /* The varying is loaded from same uniform, so no need to do any
1411 * interpolation. Mark it as flat explicitly.
1412 */
1413 if (!consumer->options->no_integers &&
1414 in_var && in_var->data.interpolation <= INTERP_MODE_NOPERSPECTIVE) {
1415 in_var->data.interpolation = INTERP_MODE_FLAT;
1416 out_var->data.interpolation = INTERP_MODE_FLAT;
1417 }
1418 }
1419 }
1420
1421 struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa);
1422 if (entry) {
1423 progress |= replace_duplicate_input(consumer,
1424 (nir_variable *)entry->data,
1425 intr);
1426 } else {
1427 nir_variable *in_var = get_matching_input_var(consumer, out_var);
1428 if (in_var) {
1429 _mesa_hash_table_insert(varying_values, ssa, in_var);
1430 }
1431 }
1432 }
1433
1434 _mesa_hash_table_destroy(varying_values, NULL);
1435
1436 return progress;
1437 }
1438
1439 /* TODO any better helper somewhere to sort a list? */
1440
1441 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1442 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1443 {
1444 nir_foreach_variable_in_list(var, var_list) {
1445 /* Use the `per_primitive` bool to sort per-primitive variables
1446 * to the end of the list, so they get the last driver locations
1447 * by nir_assign_io_var_locations.
1448 *
1449 * This is done because AMD HW requires that per-primitive outputs
1450 * are the last params.
1451 * In the future we can add an option for this, if needed by other HW.
1452 */
1453 if (new_var->data.per_primitive < var->data.per_primitive ||
1454 (new_var->data.per_primitive == var->data.per_primitive &&
1455 (var->data.location > new_var->data.location ||
1456 (var->data.location == new_var->data.location &&
1457 var->data.location_frac > new_var->data.location_frac)))) {
1458 exec_node_insert_node_before(&var->node, &new_var->node);
1459 return;
1460 }
1461 }
1462 exec_list_push_tail(var_list, &new_var->node);
1463 }
1464
1465 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1466 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1467 struct exec_list *sorted_list)
1468 {
1469 exec_list_make_empty(sorted_list);
1470 nir_foreach_variable_with_modes_safe(var, shader, mode) {
1471 exec_node_remove(&var->node);
1472 insert_sorted(sorted_list, var);
1473 }
1474 }
1475
1476 void
nir_sort_variables_by_location(nir_shader * shader,nir_variable_mode mode)1477 nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode)
1478 {
1479 struct exec_list vars;
1480
1481 sort_varyings(shader, mode, &vars);
1482 exec_list_append(&shader->variables, &vars);
1483 }
1484
1485 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1486 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1487 unsigned *size, gl_shader_stage stage)
1488 {
1489 unsigned location = 0;
1490 unsigned assigned_locations[VARYING_SLOT_TESS_MAX][2];
1491 uint64_t processed_locs[2] = { 0 };
1492
1493 struct exec_list io_vars;
1494 sort_varyings(shader, mode, &io_vars);
1495
1496 int ASSERTED last_loc = 0;
1497 bool ASSERTED last_per_prim = false;
1498 bool last_partial = false;
1499 nir_foreach_variable_in_list(var, &io_vars) {
1500 const struct glsl_type *type = var->type;
1501 if (nir_is_arrayed_io(var, stage)) {
1502 assert(glsl_type_is_array(type));
1503 type = glsl_get_array_element(type);
1504 }
1505
1506 int base;
1507 if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1508 base = VERT_ATTRIB_GENERIC0;
1509 else if (var->data.mode == nir_var_shader_out &&
1510 stage == MESA_SHADER_FRAGMENT)
1511 base = FRAG_RESULT_DATA0;
1512 else
1513 base = VARYING_SLOT_VAR0;
1514
1515 unsigned var_size, driver_size;
1516 if (var->data.compact) {
1517 /* If we are inside a partial compact,
1518 * don't allow another compact to be in this slot
1519 * if it starts at component 0.
1520 */
1521 if (last_partial && var->data.location_frac == 0) {
1522 location++;
1523 }
1524
1525 /* compact variables must be arrays of scalars */
1526 assert(!var->data.per_view);
1527 assert(glsl_type_is_array(type));
1528 assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1529 unsigned start = 4 * location + var->data.location_frac;
1530 unsigned end = start + glsl_get_length(type);
1531 var_size = driver_size = end / 4 - location;
1532 last_partial = end % 4 != 0;
1533 } else {
1534 /* Compact variables bypass the normal varying compacting pass,
1535 * which means they cannot be in the same vec4 slot as a normal
1536 * variable. If part of the current slot is taken up by a compact
1537 * variable, we need to go to the next one.
1538 */
1539 if (last_partial) {
1540 location++;
1541 last_partial = false;
1542 }
1543
1544 /* per-view variables have an extra array dimension, which is ignored
1545 * when counting user-facing slots (var->data.location), but *not*
1546 * with driver slots (var->data.driver_location). That is, each user
1547 * slot maps to multiple driver slots.
1548 */
1549 driver_size = glsl_count_attribute_slots(type, false);
1550 if (var->data.per_view) {
1551 assert(glsl_type_is_array(type));
1552 var_size =
1553 glsl_count_attribute_slots(glsl_get_array_element(type), false);
1554 } else {
1555 var_size = driver_size;
1556 }
1557 }
1558
1559 /* Builtins don't allow component packing so we only need to worry about
1560 * user defined varyings sharing the same location.
1561 */
1562 bool processed = false;
1563 if (var->data.location >= base) {
1564 unsigned glsl_location = var->data.location - base;
1565
1566 for (unsigned i = 0; i < var_size; i++) {
1567 if (processed_locs[var->data.index] &
1568 ((uint64_t)1 << (glsl_location + i)))
1569 processed = true;
1570 else
1571 processed_locs[var->data.index] |=
1572 ((uint64_t)1 << (glsl_location + i));
1573 }
1574 }
1575
1576 /* Because component packing allows varyings to share the same location
1577 * we may have already have processed this location.
1578 */
1579 if (processed) {
1580 /* TODO handle overlapping per-view variables */
1581 assert(!var->data.per_view);
1582 unsigned driver_location = assigned_locations[var->data.location][var->data.index];
1583 var->data.driver_location = driver_location;
1584
1585 /* An array may be packed such that is crosses multiple other arrays
1586 * or variables, we need to make sure we have allocated the elements
1587 * consecutively if the previously proccessed var was shorter than
1588 * the current array we are processing.
1589 *
1590 * NOTE: The code below assumes the var list is ordered in ascending
1591 * location order, but per-vertex/per-primitive outputs may be
1592 * grouped separately.
1593 */
1594 assert(last_loc <= var->data.location ||
1595 last_per_prim != var->data.per_primitive);
1596 last_loc = var->data.location;
1597 last_per_prim = var->data.per_primitive;
1598 unsigned last_slot_location = driver_location + var_size;
1599 if (last_slot_location > location) {
1600 unsigned num_unallocated_slots = last_slot_location - location;
1601 unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1602 for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1603 assigned_locations[var->data.location + i][var->data.index] = location;
1604 location++;
1605 }
1606 }
1607 continue;
1608 }
1609
1610 for (unsigned i = 0; i < var_size; i++) {
1611 assigned_locations[var->data.location + i][var->data.index] = location + i;
1612 }
1613
1614 var->data.driver_location = location;
1615 location += driver_size;
1616 }
1617
1618 if (last_partial)
1619 location++;
1620
1621 exec_list_append(&shader->variables, &io_vars);
1622 *size = location;
1623 }
1624