xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_linking_helpers.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/hash_table.h"
25 #include "util/set.h"
26 #include "nir.h"
27 #include "nir_builder.h"
28 
29 /* This file contains various little helpers for doing simple linking in
30  * NIR.  Eventually, we'll probably want a full-blown varying packing
31  * implementation in here.  Right now, it just deletes unused things.
32  */
33 
34 /**
35  * Returns the bits in the inputs_read, or outputs_written
36  * bitfield corresponding to this variable.
37  */
38 static uint64_t
get_variable_io_mask(nir_variable * var,gl_shader_stage stage)39 get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40 {
41    if (var->data.location < 0)
42       return 0;
43 
44    unsigned location = var->data.patch ? var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
45 
46    assert(var->data.mode == nir_var_shader_in ||
47           var->data.mode == nir_var_shader_out);
48    assert(var->data.location >= 0);
49    assert(location < 64);
50 
51    const struct glsl_type *type = var->type;
52    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
53       assert(glsl_type_is_array(type));
54       type = glsl_get_array_element(type);
55    }
56 
57    unsigned slots = glsl_count_attribute_slots(type, false);
58    return BITFIELD64_MASK(slots) << location;
59 }
60 
61 static bool
is_non_generic_patch_var(nir_variable * var)62 is_non_generic_patch_var(nir_variable *var)
63 {
64    return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
65           var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
66           var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
67           var->data.location == VARYING_SLOT_BOUNDING_BOX1;
68 }
69 
70 static uint8_t
get_num_components(nir_variable * var)71 get_num_components(nir_variable *var)
72 {
73    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
74       return 4;
75 
76    return glsl_get_vector_elements(glsl_without_array(var->type));
77 }
78 
79 static void
add_output_reads(nir_shader * shader,uint64_t * read,uint64_t * patches_read)80 add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
81 {
82    nir_foreach_function_impl(impl, shader) {
83       nir_foreach_block(block, impl) {
84          nir_foreach_instr(instr, block) {
85             if (instr->type != nir_instr_type_intrinsic)
86                continue;
87 
88             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
89             if (intrin->intrinsic != nir_intrinsic_load_deref)
90                continue;
91 
92             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
93             if (!nir_deref_mode_is(deref, nir_var_shader_out))
94                continue;
95 
96             nir_variable *var = nir_deref_instr_get_variable(deref);
97             for (unsigned i = 0; i < get_num_components(var); i++) {
98                if (var->data.patch) {
99                   if (is_non_generic_patch_var(var))
100                      continue;
101 
102                   patches_read[var->data.location_frac + i] |=
103                      get_variable_io_mask(var, shader->info.stage);
104                } else {
105                   read[var->data.location_frac + i] |=
106                      get_variable_io_mask(var, shader->info.stage);
107                }
108             }
109          }
110       }
111    }
112 }
113 
114 static bool
remove_unused_io_access(nir_builder * b,nir_intrinsic_instr * intrin,void * cb_data)115 remove_unused_io_access(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data)
116 {
117    nir_variable_mode mode = *(nir_variable_mode *)cb_data;
118 
119    unsigned srcn = 0;
120    switch (intrin->intrinsic) {
121    case nir_intrinsic_load_deref:
122    case nir_intrinsic_store_deref:
123    case nir_intrinsic_interp_deref_at_centroid:
124    case nir_intrinsic_interp_deref_at_sample:
125    case nir_intrinsic_interp_deref_at_offset:
126    case nir_intrinsic_interp_deref_at_vertex:
127       break;
128    case nir_intrinsic_copy_deref:
129       srcn = mode == nir_var_shader_in ? 1 : 0;
130       break;
131    default:
132       return false;
133    }
134 
135    nir_variable *var = nir_intrinsic_get_var(intrin, srcn);
136    if (!var || var->data.mode != mode || var->data.location != NUM_TOTAL_VARYING_SLOTS)
137       return false;
138 
139    if (intrin->intrinsic != nir_intrinsic_store_deref &&
140        intrin->intrinsic != nir_intrinsic_copy_deref) {
141       b->cursor = nir_before_instr(&intrin->instr);
142       nir_def *undef = nir_undef(b, intrin->num_components, intrin->def.bit_size);
143       nir_def_rewrite_uses(&intrin->def, undef);
144    }
145 
146    nir_instr_remove(&intrin->instr);
147    nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[srcn]));
148 
149    return true;
150 }
151 
152 /**
153  * Helper for removing unused shader I/O variables, by demoting them to global
154  * variables (which may then by dead code eliminated).
155  *
156  * Example usage is:
157  *
158  * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
159  *                                      read, patches_read) ||
160  *                                      progress;
161  */
162 bool
nir_remove_unused_io_vars(nir_shader * shader,nir_variable_mode mode,uint64_t * used_by_other_stage,uint64_t * used_by_other_stage_patches)163 nir_remove_unused_io_vars(nir_shader *shader,
164                           nir_variable_mode mode,
165                           uint64_t *used_by_other_stage,
166                           uint64_t *used_by_other_stage_patches)
167 {
168    bool progress = false;
169    uint64_t *used;
170 
171    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
172 
173    uint64_t read[4] = { 0 };
174    uint64_t patches_read[4] = { 0 };
175    if (mode == nir_var_shader_out)
176       add_output_reads(shader, read, patches_read);
177 
178    nir_foreach_variable_with_modes_safe(var, shader, mode) {
179       if (var->data.patch)
180          used = used_by_other_stage_patches;
181       else
182          used = used_by_other_stage;
183 
184       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0 &&
185           !(shader->info.stage == MESA_SHADER_MESH && var->data.location == VARYING_SLOT_PRIMITIVE_ID))
186          continue;
187 
188       if (var->data.always_active_io)
189          continue;
190 
191       if (var->data.explicit_xfb_buffer)
192          continue;
193 
194       uint64_t other_stage = 0;
195       uint64_t this_stage = 0;
196       for (unsigned i = 0; i < get_num_components(var); i++) {
197          other_stage |= used[var->data.location_frac + i];
198          this_stage |= (var->data.patch ? patches_read : read)[var->data.location_frac + i];
199       }
200 
201       uint64_t var_mask = get_variable_io_mask(var, shader->info.stage);
202       if (!((other_stage | this_stage) & var_mask)) {
203          /* Mark the variable as removed by setting the location to an invalid value. */
204          var->data.location = NUM_TOTAL_VARYING_SLOTS;
205          exec_node_remove(&var->node);
206          progress = true;
207       }
208    }
209 
210    if (progress) {
211       nir_shader_intrinsics_pass(shader, &remove_unused_io_access, nir_metadata_control_flow, &mode);
212    } else {
213       nir_shader_preserve_all_metadata(shader);
214    }
215 
216    return progress;
217 }
218 
219 bool
nir_remove_unused_varyings(nir_shader * producer,nir_shader * consumer)220 nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
221 {
222    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
223    assert(consumer->info.stage != MESA_SHADER_VERTEX);
224 
225    uint64_t read[4] = { 0 }, written[4] = { 0 };
226    uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
227 
228    nir_foreach_shader_out_variable(var, producer) {
229       for (unsigned i = 0; i < get_num_components(var); i++) {
230          if (var->data.patch) {
231             if (is_non_generic_patch_var(var))
232                continue;
233 
234             patches_written[var->data.location_frac + i] |=
235                get_variable_io_mask(var, producer->info.stage);
236          } else {
237             written[var->data.location_frac + i] |=
238                get_variable_io_mask(var, producer->info.stage);
239          }
240       }
241    }
242 
243    nir_foreach_shader_in_variable(var, consumer) {
244       for (unsigned i = 0; i < get_num_components(var); i++) {
245          if (var->data.patch) {
246             if (is_non_generic_patch_var(var))
247                continue;
248 
249             patches_read[var->data.location_frac + i] |=
250                get_variable_io_mask(var, consumer->info.stage);
251          } else {
252             read[var->data.location_frac + i] |=
253                get_variable_io_mask(var, consumer->info.stage);
254          }
255       }
256    }
257 
258    bool progress = false;
259    progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
260                                         patches_read);
261 
262    progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
263                                         patches_written) ||
264               progress;
265 
266    return progress;
267 }
268 
269 static uint8_t
get_interp_type(nir_variable * var,const struct glsl_type * type,bool default_to_smooth_interp)270 get_interp_type(nir_variable *var, const struct glsl_type *type,
271                 bool default_to_smooth_interp)
272 {
273    if (var->data.per_primitive)
274       return INTERP_MODE_NONE;
275    if (glsl_type_is_integer(type))
276       return INTERP_MODE_FLAT;
277    else if (var->data.interpolation != INTERP_MODE_NONE)
278       return var->data.interpolation;
279    else if (default_to_smooth_interp)
280       return INTERP_MODE_SMOOTH;
281    else
282       return INTERP_MODE_NONE;
283 }
284 
285 #define INTERPOLATE_LOC_SAMPLE   0
286 #define INTERPOLATE_LOC_CENTROID 1
287 #define INTERPOLATE_LOC_CENTER   2
288 
289 static uint8_t
get_interp_loc(nir_variable * var)290 get_interp_loc(nir_variable *var)
291 {
292    if (var->data.sample)
293       return INTERPOLATE_LOC_SAMPLE;
294    else if (var->data.centroid)
295       return INTERPOLATE_LOC_CENTROID;
296    else
297       return INTERPOLATE_LOC_CENTER;
298 }
299 
300 static bool
is_packing_supported_for_type(const struct glsl_type * type)301 is_packing_supported_for_type(const struct glsl_type *type)
302 {
303    /* We ignore complex types such as arrays, matrices, structs and bitsizes
304     * other then 32bit. All other vector types should have been split into
305     * scalar variables by the lower_io_to_scalar pass. The only exception
306     * should be OpenGL xfb varyings.
307     * TODO: add support for more complex types?
308     */
309    return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
310 }
311 
312 struct assigned_comps {
313    uint8_t comps;
314    uint8_t interp_type;
315    uint8_t interp_loc;
316    bool is_32bit;
317    bool is_mediump;
318    bool is_per_primitive;
319 };
320 
321 /* Packing arrays and dual slot varyings is difficult so to avoid complex
322  * algorithms this function just assigns them their existing location for now.
323  * TODO: allow better packing of complex types.
324  */
325 static void
get_unmoveable_components_masks(nir_shader * shader,nir_variable_mode mode,struct assigned_comps * comps,gl_shader_stage stage,bool default_to_smooth_interp)326 get_unmoveable_components_masks(nir_shader *shader,
327                                 nir_variable_mode mode,
328                                 struct assigned_comps *comps,
329                                 gl_shader_stage stage,
330                                 bool default_to_smooth_interp)
331 {
332    nir_foreach_variable_with_modes_safe(var, shader, mode) {
333       assert(var->data.location >= 0);
334 
335       /* Only remap things that aren't built-ins. */
336       if (var->data.location >= VARYING_SLOT_VAR0 &&
337           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
338 
339          const struct glsl_type *type = var->type;
340          if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
341             assert(glsl_type_is_array(type));
342             type = glsl_get_array_element(type);
343          }
344 
345          /* If we can pack this varying then don't mark the components as
346           * used.
347           */
348          if (is_packing_supported_for_type(type) &&
349              !var->data.always_active_io)
350             continue;
351 
352          unsigned location = var->data.location - VARYING_SLOT_VAR0;
353 
354          unsigned elements =
355             glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? glsl_get_vector_elements(glsl_without_array(type)) : 4;
356 
357          bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
358          unsigned slots = glsl_count_attribute_slots(type, false);
359          unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
360          unsigned comps_slot2 = 0;
361          for (unsigned i = 0; i < slots; i++) {
362             if (dual_slot) {
363                if (i & 1) {
364                   comps[location + i].comps |= ((1 << comps_slot2) - 1);
365                } else {
366                   unsigned num_comps = 4 - var->data.location_frac;
367                   comps_slot2 = (elements * dmul) - num_comps;
368 
369                   /* Assume ARB_enhanced_layouts packing rules for doubles */
370                   assert(var->data.location_frac == 0 ||
371                          var->data.location_frac == 2);
372                   assert(comps_slot2 <= 4);
373 
374                   comps[location + i].comps |=
375                      ((1 << num_comps) - 1) << var->data.location_frac;
376                }
377             } else {
378                comps[location + i].comps |=
379                   ((1 << (elements * dmul)) - 1) << var->data.location_frac;
380             }
381 
382             comps[location + i].interp_type =
383                get_interp_type(var, type, default_to_smooth_interp);
384             comps[location + i].interp_loc = get_interp_loc(var);
385             comps[location + i].is_32bit =
386                glsl_type_is_32bit(glsl_without_array(type));
387             comps[location + i].is_mediump =
388                var->data.precision == GLSL_PRECISION_MEDIUM ||
389                var->data.precision == GLSL_PRECISION_LOW;
390             comps[location + i].is_per_primitive = var->data.per_primitive;
391          }
392       }
393    }
394 }
395 
396 struct varying_loc {
397    uint8_t component;
398    uint32_t location;
399 };
400 
401 static void
mark_all_used_slots(nir_variable * var,uint64_t * slots_used,uint64_t slots_used_mask,unsigned num_slots)402 mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
403                     uint64_t slots_used_mask, unsigned num_slots)
404 {
405    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
406 
407    slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
408                                           BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
409 }
410 
411 static void
mark_used_slot(nir_variable * var,uint64_t * slots_used,unsigned offset)412 mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
413 {
414    unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
415 
416    slots_used[var->data.patch ? 1 : 0] |=
417       BITFIELD64_BIT(var->data.location - loc_offset + offset);
418 }
419 
420 static void
remap_slots_and_components(nir_shader * shader,nir_variable_mode mode,struct varying_loc (* remap)[4],uint64_t * slots_used,uint64_t * out_slots_read,uint32_t * p_slots_used,uint32_t * p_out_slots_read)421 remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
422                            struct varying_loc (*remap)[4],
423                            uint64_t *slots_used, uint64_t *out_slots_read,
424                            uint32_t *p_slots_used, uint32_t *p_out_slots_read)
425 {
426    const gl_shader_stage stage = shader->info.stage;
427    uint64_t out_slots_read_tmp[2] = { 0 };
428    uint64_t slots_used_tmp[2] = { 0 };
429 
430    /* We don't touch builtins so just copy the bitmask */
431    slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
432 
433    nir_foreach_variable_with_modes(var, shader, mode) {
434       assert(var->data.location >= 0);
435 
436       /* Only remap things that aren't built-ins */
437       if (var->data.location >= VARYING_SLOT_VAR0 &&
438           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
439 
440          const struct glsl_type *type = var->type;
441          if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
442             assert(glsl_type_is_array(type));
443             type = glsl_get_array_element(type);
444          }
445 
446          unsigned num_slots = glsl_count_attribute_slots(type, false);
447          bool used_across_stages = false;
448          bool outputs_read = false;
449 
450          unsigned location = var->data.location - VARYING_SLOT_VAR0;
451          struct varying_loc *new_loc = &remap[location][var->data.location_frac];
452 
453          unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
454          uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
455          uint64_t outs_used =
456             var->data.patch ? *p_out_slots_read : *out_slots_read;
457          uint64_t slots =
458             BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
459 
460          if (slots & used)
461             used_across_stages = true;
462 
463          if (slots & outs_used)
464             outputs_read = true;
465 
466          if (new_loc->location) {
467             var->data.location = new_loc->location;
468             var->data.location_frac = new_loc->component;
469          }
470 
471          if (var->data.always_active_io) {
472             /* We can't apply link time optimisations (specifically array
473              * splitting) to these so we need to copy the existing mask
474              * otherwise we will mess up the mask for things like partially
475              * marked arrays.
476              */
477             if (used_across_stages)
478                mark_all_used_slots(var, slots_used_tmp, used, num_slots);
479 
480             if (outputs_read) {
481                mark_all_used_slots(var, out_slots_read_tmp, outs_used,
482                                    num_slots);
483             }
484          } else {
485             for (unsigned i = 0; i < num_slots; i++) {
486                if (used_across_stages)
487                   mark_used_slot(var, slots_used_tmp, i);
488 
489                if (outputs_read)
490                   mark_used_slot(var, out_slots_read_tmp, i);
491             }
492          }
493       }
494    }
495 
496    *slots_used = slots_used_tmp[0];
497    *out_slots_read = out_slots_read_tmp[0];
498    *p_slots_used = slots_used_tmp[1];
499    *p_out_slots_read = out_slots_read_tmp[1];
500 }
501 
502 struct varying_component {
503    nir_variable *var;
504    uint8_t interp_type;
505    uint8_t interp_loc;
506    bool is_32bit;
507    bool is_patch;
508    bool is_per_primitive;
509    bool is_mediump;
510    bool is_intra_stage_only;
511    bool initialised;
512 };
513 
514 static int
cmp_varying_component(const void * comp1_v,const void * comp2_v)515 cmp_varying_component(const void *comp1_v, const void *comp2_v)
516 {
517    struct varying_component *comp1 = (struct varying_component *)comp1_v;
518    struct varying_component *comp2 = (struct varying_component *)comp2_v;
519 
520    /* We want patches to be order at the end of the array */
521    if (comp1->is_patch != comp2->is_patch)
522       return comp1->is_patch ? 1 : -1;
523 
524    /* Sort per-primitive outputs after per-vertex ones to allow
525     * better compaction when they are mixed in the shader's source.
526     */
527    if (comp1->is_per_primitive != comp2->is_per_primitive)
528       return comp1->is_per_primitive ? 1 : -1;
529 
530    /* We want to try to group together TCS outputs that are only read by other
531     * TCS invocations and not consumed by the follow stage.
532     */
533    if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
534       return comp1->is_intra_stage_only ? 1 : -1;
535 
536    /* Group mediump varyings together. */
537    if (comp1->is_mediump != comp2->is_mediump)
538       return comp1->is_mediump ? 1 : -1;
539 
540    /* We can only pack varyings with matching interpolation types so group
541     * them together.
542     */
543    if (comp1->interp_type != comp2->interp_type)
544       return comp1->interp_type - comp2->interp_type;
545 
546    /* Interpolation loc must match also. */
547    if (comp1->interp_loc != comp2->interp_loc)
548       return comp1->interp_loc - comp2->interp_loc;
549 
550    /* If everything else matches just use the original location to sort */
551    const struct nir_variable_data *const data1 = &comp1->var->data;
552    const struct nir_variable_data *const data2 = &comp2->var->data;
553    if (data1->location != data2->location)
554       return data1->location - data2->location;
555    return (int)data1->location_frac - (int)data2->location_frac;
556 }
557 
558 static void
gather_varying_component_info(nir_shader * producer,nir_shader * consumer,struct varying_component ** varying_comp_info,unsigned * varying_comp_info_size,bool default_to_smooth_interp)559 gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
560                               struct varying_component **varying_comp_info,
561                               unsigned *varying_comp_info_size,
562                               bool default_to_smooth_interp)
563 {
564    unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = { { 0 } };
565    unsigned num_of_comps_to_pack = 0;
566 
567    /* Count the number of varying that can be packed and create a mapping
568     * of those varyings to the array we will pass to qsort.
569     */
570    nir_foreach_shader_out_variable(var, producer) {
571 
572       /* Only remap things that aren't builtins. */
573       if (var->data.location >= VARYING_SLOT_VAR0 &&
574           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
575 
576          /* We can't repack xfb varyings. */
577          if (var->data.always_active_io)
578             continue;
579 
580          const struct glsl_type *type = var->type;
581          if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
582             assert(glsl_type_is_array(type));
583             type = glsl_get_array_element(type);
584          }
585 
586          if (!is_packing_supported_for_type(type))
587             continue;
588 
589          unsigned loc = var->data.location - VARYING_SLOT_VAR0;
590          store_varying_info_idx[loc][var->data.location_frac] =
591             ++num_of_comps_to_pack;
592       }
593    }
594 
595    *varying_comp_info_size = num_of_comps_to_pack;
596    *varying_comp_info = rzalloc_array(NULL, struct varying_component,
597                                       num_of_comps_to_pack);
598 
599    nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
600 
601    /* Walk over the shader and populate the varying component info array */
602    nir_foreach_block(block, impl) {
603       nir_foreach_instr(instr, block) {
604          if (instr->type != nir_instr_type_intrinsic)
605             continue;
606 
607          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
608          if (intr->intrinsic != nir_intrinsic_load_deref &&
609              intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
610              intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
611              intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
612              intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
613             continue;
614 
615          nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
616          if (!nir_deref_mode_is(deref, nir_var_shader_in))
617             continue;
618 
619          /* We only remap things that aren't builtins. */
620          nir_variable *in_var = nir_deref_instr_get_variable(deref);
621          if (in_var->data.location < VARYING_SLOT_VAR0)
622             continue;
623 
624          /* Do not remap per-vertex shader inputs because it's an array of
625           * 3-elements and this isn't supported.
626           */
627          if (in_var->data.per_vertex)
628             continue;
629 
630          unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
631          if (location >= MAX_VARYINGS_INCL_PATCH)
632             continue;
633 
634          unsigned var_info_idx =
635             store_varying_info_idx[location][in_var->data.location_frac];
636          if (!var_info_idx)
637             continue;
638 
639          struct varying_component *vc_info =
640             &(*varying_comp_info)[var_info_idx - 1];
641 
642          if (!vc_info->initialised) {
643             const struct glsl_type *type = in_var->type;
644             if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
645                 in_var->data.per_view) {
646                assert(glsl_type_is_array(type));
647                type = glsl_get_array_element(type);
648             }
649 
650             vc_info->var = in_var;
651             vc_info->interp_type =
652                get_interp_type(in_var, type, default_to_smooth_interp);
653             vc_info->interp_loc = get_interp_loc(in_var);
654             vc_info->is_32bit = glsl_type_is_32bit(type);
655             vc_info->is_patch = in_var->data.patch;
656             vc_info->is_per_primitive = in_var->data.per_primitive;
657             vc_info->is_mediump = !producer->options->linker_ignore_precision &&
658                                   (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
659                                    in_var->data.precision == GLSL_PRECISION_LOW);
660             vc_info->is_intra_stage_only = false;
661             vc_info->initialised = true;
662          }
663       }
664    }
665 
666    /* Walk over the shader and populate the varying component info array
667     * for varyings which are read by other TCS instances but are not consumed
668     * by the TES.
669     */
670    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
671       impl = nir_shader_get_entrypoint(producer);
672 
673       nir_foreach_block(block, impl) {
674          nir_foreach_instr(instr, block) {
675             if (instr->type != nir_instr_type_intrinsic)
676                continue;
677 
678             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
679             if (intr->intrinsic != nir_intrinsic_load_deref)
680                continue;
681 
682             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
683             if (!nir_deref_mode_is(deref, nir_var_shader_out))
684                continue;
685 
686             /* We only remap things that aren't builtins. */
687             nir_variable *out_var = nir_deref_instr_get_variable(deref);
688             if (out_var->data.location < VARYING_SLOT_VAR0)
689                continue;
690 
691             unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
692             if (location >= MAX_VARYINGS_INCL_PATCH)
693                continue;
694 
695             unsigned var_info_idx =
696                store_varying_info_idx[location][out_var->data.location_frac];
697             if (!var_info_idx) {
698                /* Something went wrong, the shader interfaces didn't match, so
699                 * abandon packing. This can happen for example when the
700                 * inputs are scalars but the outputs are struct members.
701                 */
702                *varying_comp_info_size = 0;
703                break;
704             }
705 
706             struct varying_component *vc_info =
707                &(*varying_comp_info)[var_info_idx - 1];
708 
709             if (!vc_info->initialised) {
710                const struct glsl_type *type = out_var->type;
711                if (nir_is_arrayed_io(out_var, producer->info.stage)) {
712                   assert(glsl_type_is_array(type));
713                   type = glsl_get_array_element(type);
714                }
715 
716                vc_info->var = out_var;
717                vc_info->interp_type =
718                   get_interp_type(out_var, type, default_to_smooth_interp);
719                vc_info->interp_loc = get_interp_loc(out_var);
720                vc_info->is_32bit = glsl_type_is_32bit(type);
721                vc_info->is_patch = out_var->data.patch;
722                vc_info->is_per_primitive = out_var->data.per_primitive;
723                vc_info->is_mediump = !producer->options->linker_ignore_precision &&
724                                      (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
725                                       out_var->data.precision == GLSL_PRECISION_LOW);
726                vc_info->is_intra_stage_only = true;
727                vc_info->initialised = true;
728             }
729          }
730       }
731    }
732 
733    for (unsigned i = 0; i < *varying_comp_info_size; i++) {
734       struct varying_component *vc_info = &(*varying_comp_info)[i];
735       if (!vc_info->initialised) {
736          /* Something went wrong, the shader interfaces didn't match, so
737           * abandon packing. This can happen for example when the outputs are
738           * scalars but the inputs are struct members.
739           */
740          *varying_comp_info_size = 0;
741          break;
742       }
743    }
744 }
745 
746 static bool
allow_pack_interp_type(nir_io_options options,int type)747 allow_pack_interp_type(nir_io_options options, int type)
748 {
749    switch (type) {
750    case INTERP_MODE_NONE:
751    case INTERP_MODE_SMOOTH:
752    case INTERP_MODE_NOPERSPECTIVE:
753       return options & nir_io_has_flexible_input_interpolation_except_flat;
754    default:
755       return false;
756    }
757 }
758 
759 static void
assign_remap_locations(struct varying_loc (* remap)[4],struct assigned_comps * assigned_comps,struct varying_component * info,unsigned * cursor,unsigned * comp,unsigned max_location,nir_io_options options)760    assign_remap_locations(struct varying_loc (*remap)[4],
761                           struct assigned_comps *assigned_comps,
762                           struct varying_component *info,
763                           unsigned *cursor, unsigned *comp,
764                           unsigned max_location,
765                           nir_io_options options)
766 {
767    unsigned tmp_cursor = *cursor;
768    unsigned tmp_comp = *comp;
769 
770    for (; tmp_cursor < max_location; tmp_cursor++) {
771 
772       if (assigned_comps[tmp_cursor].comps) {
773          /* Don't pack per-primitive and per-vertex varyings together. */
774          if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) {
775             tmp_comp = 0;
776             continue;
777          }
778 
779          /* We can only pack varyings with matching precision. */
780          if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
781             tmp_comp = 0;
782             continue;
783          }
784 
785          /* We can only pack varyings with matching interpolation type
786           * if driver does not support it.
787           */
788          if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
789              (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
790               !allow_pack_interp_type(options, info->interp_type))) {
791             tmp_comp = 0;
792             continue;
793          }
794 
795          /* We can only pack varyings with matching interpolation location
796           * if driver does not support it.
797           */
798          if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
799              !(options & nir_io_has_flexible_input_interpolation_except_flat)) {
800             tmp_comp = 0;
801             continue;
802          }
803 
804          /* We can only pack varyings with matching types, and the current
805           * algorithm only supports packing 32-bit.
806           */
807          if (!assigned_comps[tmp_cursor].is_32bit) {
808             tmp_comp = 0;
809             continue;
810          }
811 
812          while (tmp_comp < 4 &&
813                 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
814             tmp_comp++;
815          }
816       }
817 
818       if (tmp_comp == 4) {
819          tmp_comp = 0;
820          continue;
821       }
822 
823       unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
824 
825       /* Once we have assigned a location mark it as used */
826       assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
827       assigned_comps[tmp_cursor].interp_type = info->interp_type;
828       assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
829       assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
830       assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
831       assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive;
832 
833       /* Assign remap location */
834       remap[location][info->var->data.location_frac].component = tmp_comp++;
835       remap[location][info->var->data.location_frac].location =
836          tmp_cursor + VARYING_SLOT_VAR0;
837 
838       break;
839    }
840 
841    *cursor = tmp_cursor;
842    *comp = tmp_comp;
843 }
844 
845 /* If there are empty components in the slot compact the remaining components
846  * as close to component 0 as possible. This will make it easier to fill the
847  * empty components with components from a different slot in a following pass.
848  */
849 static void
compact_components(nir_shader * producer,nir_shader * consumer,struct assigned_comps * assigned_comps,bool default_to_smooth_interp)850 compact_components(nir_shader *producer, nir_shader *consumer,
851                    struct assigned_comps *assigned_comps,
852                    bool default_to_smooth_interp)
853 {
854    struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = { { { 0 }, { 0 } } };
855    struct varying_component *varying_comp_info;
856    unsigned varying_comp_info_size;
857 
858    /* Gather varying component info */
859    gather_varying_component_info(producer, consumer, &varying_comp_info,
860                                  &varying_comp_info_size,
861                                  default_to_smooth_interp);
862 
863    /* Sort varying components. */
864    qsort(varying_comp_info, varying_comp_info_size,
865          sizeof(struct varying_component), cmp_varying_component);
866 
867    unsigned cursor = 0;
868    unsigned comp = 0;
869 
870    /* Set the remap array based on the sorted components */
871    for (unsigned i = 0; i < varying_comp_info_size; i++) {
872       struct varying_component *info = &varying_comp_info[i];
873 
874       assert(info->is_patch || cursor < MAX_VARYING);
875       if (info->is_patch) {
876          /* The list should be sorted with all non-patch inputs first followed
877           * by patch inputs.  When we hit our first patch input, we need to
878           * reset the cursor to MAX_VARYING so we put them in the right slot.
879           */
880          if (cursor < MAX_VARYING) {
881             cursor = MAX_VARYING;
882             comp = 0;
883          }
884 
885          assign_remap_locations(remap, assigned_comps, info,
886                                 &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
887                                 consumer->options->io_options);
888       } else {
889          assign_remap_locations(remap, assigned_comps, info,
890                                 &cursor, &comp, MAX_VARYING,
891                                 consumer->options->io_options);
892 
893          /* Check if we failed to assign a remap location. This can happen if
894           * for example there are a bunch of unmovable components with
895           * mismatching interpolation types causing us to skip over locations
896           * that would have been useful for packing later components.
897           * The solution is to iterate over the locations again (this should
898           * happen very rarely in practice).
899           */
900          if (cursor == MAX_VARYING) {
901             cursor = 0;
902             comp = 0;
903             assign_remap_locations(remap, assigned_comps, info,
904                                    &cursor, &comp, MAX_VARYING,
905                                    consumer->options->io_options);
906          }
907       }
908    }
909 
910    ralloc_free(varying_comp_info);
911 
912    uint64_t zero = 0;
913    uint32_t zero32 = 0;
914    remap_slots_and_components(consumer, nir_var_shader_in, remap,
915                               &consumer->info.inputs_read, &zero,
916                               &consumer->info.patch_inputs_read, &zero32);
917    remap_slots_and_components(producer, nir_var_shader_out, remap,
918                               &producer->info.outputs_written,
919                               &producer->info.outputs_read,
920                               &producer->info.patch_outputs_written,
921                               &producer->info.patch_outputs_read);
922 }
923 
924 /* We assume that this has been called more-or-less directly after
925  * remove_unused_varyings.  At this point, all of the varyings that we
926  * aren't going to be using have been completely removed and the
927  * inputs_read and outputs_written fields in nir_shader_info reflect
928  * this.  Therefore, the total set of valid slots is the OR of the two
929  * sets of varyings;  this accounts for varyings which one side may need
930  * to read/write even if the other doesn't.  This can happen if, for
931  * instance, an array is used indirectly from one side causing it to be
932  * unsplittable but directly from the other.
933  */
934 void
nir_compact_varyings(nir_shader * producer,nir_shader * consumer,bool default_to_smooth_interp)935 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
936                      bool default_to_smooth_interp)
937 {
938    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
939    assert(consumer->info.stage != MESA_SHADER_VERTEX);
940 
941    struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = { { 0 } };
942 
943    get_unmoveable_components_masks(producer, nir_var_shader_out,
944                                    assigned_comps,
945                                    producer->info.stage,
946                                    default_to_smooth_interp);
947    get_unmoveable_components_masks(consumer, nir_var_shader_in,
948                                    assigned_comps,
949                                    consumer->info.stage,
950                                    default_to_smooth_interp);
951 
952    compact_components(producer, consumer, assigned_comps,
953                       default_to_smooth_interp);
954 }
955 
956 /*
957  * Mark XFB varyings as always_active_io in the consumer so the linking opts
958  * don't touch them.
959  */
960 void
nir_link_xfb_varyings(nir_shader * producer,nir_shader * consumer)961 nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
962 {
963    nir_variable *input_vars[MAX_VARYING][4] = { 0 };
964 
965    nir_foreach_shader_in_variable(var, consumer) {
966       if (var->data.location >= VARYING_SLOT_VAR0 &&
967           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
968 
969          unsigned location = var->data.location - VARYING_SLOT_VAR0;
970          input_vars[location][var->data.location_frac] = var;
971       }
972    }
973 
974    nir_foreach_shader_out_variable(var, producer) {
975       if (var->data.location >= VARYING_SLOT_VAR0 &&
976           var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
977 
978          if (!var->data.always_active_io)
979             continue;
980 
981          unsigned location = var->data.location - VARYING_SLOT_VAR0;
982          if (input_vars[location][var->data.location_frac]) {
983             input_vars[location][var->data.location_frac]->data.always_active_io = true;
984          }
985       }
986    }
987 }
988 
989 static bool
does_varying_match(nir_variable * out_var,nir_variable * in_var)990 does_varying_match(nir_variable *out_var, nir_variable *in_var)
991 {
992    return in_var->data.location == out_var->data.location &&
993           in_var->data.location_frac == out_var->data.location_frac &&
994           in_var->type == out_var->type;
995 }
996 
997 static nir_variable *
get_matching_input_var(nir_shader * consumer,nir_variable * out_var)998 get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
999 {
1000    nir_foreach_shader_in_variable(var, consumer) {
1001       if (does_varying_match(out_var, var))
1002          return var;
1003    }
1004 
1005    return NULL;
1006 }
1007 
1008 static bool
can_replace_varying(nir_variable * out_var)1009 can_replace_varying(nir_variable *out_var)
1010 {
1011    /* Skip types that require more complex handling.
1012     * TODO: add support for these types.
1013     */
1014    if (glsl_type_is_array(out_var->type) ||
1015        glsl_type_is_dual_slot(out_var->type) ||
1016        glsl_type_is_matrix(out_var->type) ||
1017        glsl_type_is_struct_or_ifc(out_var->type))
1018       return false;
1019 
1020    /* Limit this pass to scalars for now to keep things simple. Most varyings
1021     * should have been lowered to scalars at this point anyway.
1022     */
1023    if (!glsl_type_is_scalar(out_var->type))
1024       return false;
1025 
1026    if (out_var->data.location < VARYING_SLOT_VAR0 ||
1027        out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1028       return false;
1029 
1030    return true;
1031 }
1032 
1033 static bool
replace_varying_input_by_constant_load(nir_shader * shader,nir_intrinsic_instr * store_intr)1034 replace_varying_input_by_constant_load(nir_shader *shader,
1035                                        nir_intrinsic_instr *store_intr)
1036 {
1037    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1038 
1039    nir_builder b = nir_builder_create(impl);
1040 
1041    nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1042 
1043    bool progress = false;
1044    nir_foreach_block(block, impl) {
1045       nir_foreach_instr(instr, block) {
1046          if (instr->type != nir_instr_type_intrinsic)
1047             continue;
1048 
1049          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1050          if (intr->intrinsic != nir_intrinsic_load_deref)
1051             continue;
1052 
1053          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1054          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1055             continue;
1056 
1057          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1058 
1059          if (!does_varying_match(out_var, in_var))
1060             continue;
1061 
1062          b.cursor = nir_before_instr(instr);
1063 
1064          nir_load_const_instr *out_const =
1065             nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1066 
1067          /* Add new const to replace the input */
1068          nir_def *nconst = nir_build_imm(&b, store_intr->num_components,
1069                                          intr->def.bit_size,
1070                                          out_const->value);
1071 
1072          nir_def_rewrite_uses(&intr->def, nconst);
1073 
1074          progress = true;
1075       }
1076    }
1077 
1078    return progress;
1079 }
1080 
1081 static bool
replace_duplicate_input(nir_shader * shader,nir_variable * input_var,nir_intrinsic_instr * dup_store_intr)1082 replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1083                         nir_intrinsic_instr *dup_store_intr)
1084 {
1085    assert(input_var);
1086 
1087    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1088 
1089    nir_builder b = nir_builder_create(impl);
1090 
1091    nir_variable *dup_out_var = nir_intrinsic_get_var(dup_store_intr, 0);
1092 
1093    bool progress = false;
1094    nir_foreach_block(block, impl) {
1095       nir_foreach_instr(instr, block) {
1096          if (instr->type != nir_instr_type_intrinsic)
1097             continue;
1098 
1099          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1100          if (intr->intrinsic != nir_intrinsic_load_deref)
1101             continue;
1102 
1103          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1104          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1105             continue;
1106 
1107          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1108 
1109          if (!does_varying_match(dup_out_var, in_var) ||
1110              in_var->data.interpolation != input_var->data.interpolation ||
1111              get_interp_loc(in_var) != get_interp_loc(input_var) ||
1112              in_var->data.per_vertex)
1113             continue;
1114 
1115          b.cursor = nir_before_instr(instr);
1116 
1117          nir_def *load = nir_load_var(&b, input_var);
1118          nir_def_rewrite_uses(&intr->def, load);
1119 
1120          progress = true;
1121       }
1122    }
1123 
1124    return progress;
1125 }
1126 
1127 static bool
is_direct_uniform_load(nir_def * def,nir_scalar * s)1128 is_direct_uniform_load(nir_def *def, nir_scalar *s)
1129 {
1130    /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1131    assert(def->num_components == 1);
1132 
1133    /* Uniform load may hide behind some move instruction for converting
1134     * vector to scalar:
1135     *
1136     *     vec1 32 ssa_1 = deref_var &color (uniform vec3)
1137     *     vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1138     *     vec1 32 ssa_3 = mov ssa_2.x
1139     *     vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1140     *     intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1141     */
1142    *s = nir_scalar_resolved(def, 0);
1143 
1144    nir_def *ssa = s->def;
1145    if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1146       return false;
1147 
1148    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1149    if (intr->intrinsic != nir_intrinsic_load_deref)
1150       return false;
1151 
1152    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1153    /* TODO: support nir_var_mem_ubo. */
1154    if (!nir_deref_mode_is(deref, nir_var_uniform))
1155       return false;
1156 
1157    /* Does not support indirect uniform load. */
1158    return !nir_deref_instr_has_indirect(deref);
1159 }
1160 
1161 /**
1162  * Add a uniform variable from one shader to a different shader.
1163  *
1164  * \param nir     The shader where to add the uniform
1165  * \param uniform The uniform that's declared in another shader.
1166  */
1167 nir_variable *
nir_clone_uniform_variable(nir_shader * nir,nir_variable * uniform,bool spirv)1168 nir_clone_uniform_variable(nir_shader *nir, nir_variable *uniform, bool spirv)
1169 {
1170    /* Find if uniform already exists in consumer. */
1171    nir_variable *new_var = NULL;
1172    nir_foreach_variable_with_modes(v, nir, uniform->data.mode) {
1173       if ((spirv && uniform->data.mode & nir_var_mem_ubo &&
1174            v->data.binding == uniform->data.binding) ||
1175           (!spirv && !strcmp(uniform->name, v->name))) {
1176          new_var = v;
1177          break;
1178       }
1179    }
1180 
1181    /* Create a variable if not exist. */
1182    if (!new_var) {
1183       new_var = nir_variable_clone(uniform, nir);
1184       nir_shader_add_variable(nir, new_var);
1185    }
1186 
1187    return new_var;
1188 }
1189 
1190 nir_deref_instr *
nir_clone_deref_instr(nir_builder * b,nir_variable * var,nir_deref_instr * deref)1191 nir_clone_deref_instr(nir_builder *b, nir_variable *var,
1192                       nir_deref_instr *deref)
1193 {
1194    if (deref->deref_type == nir_deref_type_var)
1195       return nir_build_deref_var(b, var);
1196 
1197    nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1198    nir_deref_instr *parent = nir_clone_deref_instr(b, var, parent_deref);
1199 
1200    /* Build array and struct deref instruction.
1201     * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1202     */
1203    switch (deref->deref_type) {
1204    case nir_deref_type_array: {
1205       nir_load_const_instr *index =
1206          nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1207       return nir_build_deref_array_imm(b, parent, index->value->i64);
1208    }
1209    case nir_deref_type_ptr_as_array: {
1210       nir_load_const_instr *index =
1211          nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1212       nir_def *ssa = nir_imm_intN_t(b, index->value->i64,
1213                                     parent->def.bit_size);
1214       return nir_build_deref_ptr_as_array(b, parent, ssa);
1215    }
1216    case nir_deref_type_struct:
1217       return nir_build_deref_struct(b, parent, deref->strct.index);
1218    default:
1219       unreachable("invalid type");
1220       return NULL;
1221    }
1222 }
1223 
1224 static bool
replace_varying_input_by_uniform_load(nir_shader * shader,nir_intrinsic_instr * store_intr,nir_scalar * scalar)1225 replace_varying_input_by_uniform_load(nir_shader *shader,
1226                                       nir_intrinsic_instr *store_intr,
1227                                       nir_scalar *scalar)
1228 {
1229    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1230 
1231    nir_builder b = nir_builder_create(impl);
1232 
1233    nir_variable *out_var = nir_intrinsic_get_var(store_intr, 0);
1234 
1235    nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1236    nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1237    nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1238    uni_var = nir_clone_uniform_variable(shader, uni_var, false);
1239 
1240    bool progress = false;
1241    nir_foreach_block(block, impl) {
1242       nir_foreach_instr(instr, block) {
1243          if (instr->type != nir_instr_type_intrinsic)
1244             continue;
1245 
1246          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1247          if (intr->intrinsic != nir_intrinsic_load_deref)
1248             continue;
1249 
1250          nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1251          if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1252             continue;
1253 
1254          nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1255 
1256          if (!does_varying_match(out_var, in_var))
1257             continue;
1258 
1259          b.cursor = nir_before_instr(instr);
1260 
1261          /* Clone instructions start from deref load to variable deref. */
1262          nir_deref_instr *uni_deref = nir_clone_deref_instr(&b, uni_var, deref);
1263          nir_def *uni_def = nir_load_deref(&b, uni_deref);
1264 
1265          /* Add a vector to scalar move if uniform is a vector. */
1266          if (uni_def->num_components > 1) {
1267             nir_alu_src src = { 0 };
1268             src.src = nir_src_for_ssa(uni_def);
1269             src.swizzle[0] = scalar->comp;
1270             uni_def = nir_mov_alu(&b, src, 1);
1271          }
1272 
1273          /* Replace load input with load uniform. */
1274          nir_def_rewrite_uses(&intr->def, uni_def);
1275 
1276          progress = true;
1277       }
1278    }
1279 
1280    return progress;
1281 }
1282 
1283 /* The GLSL ES 3.20 spec says:
1284  *
1285  * "The precision of a vertex output does not need to match the precision of
1286  * the corresponding fragment input. The minimum precision at which vertex
1287  * outputs are interpolated is the minimum of the vertex output precision and
1288  * the fragment input precision, with the exception that for highp,
1289  * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1290  * Output Matching by Name in Linked Programs")
1291  *
1292  * To implement this, when linking shaders we will take the minimum precision
1293  * qualifier (allowing drivers to interpolate at lower precision). For
1294  * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1295  * requires we use the *last* specified precision if there is a conflict.
1296  *
1297  * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1298  * NONE, we'll return the other precision, since there is no conflict.
1299  * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1300  * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1301  * "backwards". For non-fragment stages, we'll pick the latter precision to
1302  * comply with the spec. (Note that the order matters.)
1303  *
1304  * For streamout, "Variables declared with lowp or mediump precision are
1305  * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1306  * of OpenGL ES 3.2 specification). So drivers should promote them
1307  * the transform feedback memory store, but not the output store.
1308  */
1309 
1310 static unsigned
nir_link_precision(unsigned producer,unsigned consumer,bool fs)1311 nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1312 {
1313    if (producer == GLSL_PRECISION_NONE)
1314       return consumer;
1315    else if (consumer == GLSL_PRECISION_NONE)
1316       return producer;
1317    else
1318       return fs ? MAX2(producer, consumer) : consumer;
1319 }
1320 
1321 static nir_variable *
find_consumer_variable(const nir_shader * consumer,const nir_variable * producer_var)1322 find_consumer_variable(const nir_shader *consumer,
1323                        const nir_variable *producer_var)
1324 {
1325    nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) {
1326       if (var->data.location == producer_var->data.location &&
1327           var->data.location_frac == producer_var->data.location_frac)
1328          return var;
1329    }
1330    return NULL;
1331 }
1332 
1333 void
nir_link_varying_precision(nir_shader * producer,nir_shader * consumer)1334 nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1335 {
1336    bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1337 
1338    nir_foreach_shader_out_variable(producer_var, producer) {
1339       /* Skip if the slot is not assigned */
1340       if (producer_var->data.location < 0)
1341          continue;
1342 
1343       nir_variable *consumer_var = find_consumer_variable(consumer,
1344                                                           producer_var);
1345 
1346       /* Skip if the variable will be eliminated */
1347       if (!consumer_var)
1348          continue;
1349 
1350       /* Now we have a pair of variables. Let's pick the smaller precision. */
1351       unsigned precision_1 = producer_var->data.precision;
1352       unsigned precision_2 = consumer_var->data.precision;
1353       unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1354 
1355       /* Propagate the new precision */
1356       producer_var->data.precision = consumer_var->data.precision = minimum;
1357    }
1358 }
1359 
1360 bool
nir_link_opt_varyings(nir_shader * producer,nir_shader * consumer)1361 nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1362 {
1363    /* TODO: Add support for more shader stage combinations */
1364    if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1365        (producer->info.stage != MESA_SHADER_VERTEX &&
1366         producer->info.stage != MESA_SHADER_TESS_EVAL))
1367       return false;
1368 
1369    bool progress = false;
1370 
1371    nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1372 
1373    struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1374 
1375    /* If we find a store in the last block of the producer we can be sure this
1376     * is the only possible value for this output.
1377     */
1378    nir_block *last_block = nir_impl_last_block(impl);
1379    nir_foreach_instr_reverse(instr, last_block) {
1380       if (instr->type != nir_instr_type_intrinsic)
1381          continue;
1382 
1383       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1384 
1385       if (intr->intrinsic != nir_intrinsic_store_deref)
1386          continue;
1387 
1388       nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1389       if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1390          continue;
1391 
1392       nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1393       if (!can_replace_varying(out_var))
1394          continue;
1395 
1396       nir_def *ssa = intr->src[1].ssa;
1397       if (ssa->parent_instr->type == nir_instr_type_load_const) {
1398          progress |= replace_varying_input_by_constant_load(consumer, intr);
1399          continue;
1400       }
1401 
1402       nir_scalar uni_scalar;
1403       if (is_direct_uniform_load(ssa, &uni_scalar)) {
1404          if (consumer->options->lower_varying_from_uniform) {
1405             progress |= replace_varying_input_by_uniform_load(consumer, intr,
1406                                                               &uni_scalar);
1407             continue;
1408          } else {
1409             nir_variable *in_var = get_matching_input_var(consumer, out_var);
1410             /* The varying is loaded from same uniform, so no need to do any
1411              * interpolation. Mark it as flat explicitly.
1412              */
1413             if (!consumer->options->no_integers &&
1414                 in_var && in_var->data.interpolation <= INTERP_MODE_NOPERSPECTIVE) {
1415                in_var->data.interpolation = INTERP_MODE_FLAT;
1416                out_var->data.interpolation = INTERP_MODE_FLAT;
1417             }
1418          }
1419       }
1420 
1421       struct hash_entry *entry = _mesa_hash_table_search(varying_values, ssa);
1422       if (entry) {
1423          progress |= replace_duplicate_input(consumer,
1424                                              (nir_variable *)entry->data,
1425                                              intr);
1426       } else {
1427          nir_variable *in_var = get_matching_input_var(consumer, out_var);
1428          if (in_var) {
1429             _mesa_hash_table_insert(varying_values, ssa, in_var);
1430          }
1431       }
1432    }
1433 
1434    _mesa_hash_table_destroy(varying_values, NULL);
1435 
1436    return progress;
1437 }
1438 
1439 /* TODO any better helper somewhere to sort a list? */
1440 
1441 static void
insert_sorted(struct exec_list * var_list,nir_variable * new_var)1442 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1443 {
1444    nir_foreach_variable_in_list(var, var_list) {
1445       /* Use the `per_primitive` bool to sort per-primitive variables
1446        * to the end of the list, so they get the last driver locations
1447        * by nir_assign_io_var_locations.
1448        *
1449        * This is done because AMD HW requires that per-primitive outputs
1450        * are the last params.
1451        * In the future we can add an option for this, if needed by other HW.
1452        */
1453       if (new_var->data.per_primitive < var->data.per_primitive ||
1454           (new_var->data.per_primitive == var->data.per_primitive &&
1455            (var->data.location > new_var->data.location ||
1456             (var->data.location == new_var->data.location &&
1457              var->data.location_frac > new_var->data.location_frac)))) {
1458          exec_node_insert_node_before(&var->node, &new_var->node);
1459          return;
1460       }
1461    }
1462    exec_list_push_tail(var_list, &new_var->node);
1463 }
1464 
1465 static void
sort_varyings(nir_shader * shader,nir_variable_mode mode,struct exec_list * sorted_list)1466 sort_varyings(nir_shader *shader, nir_variable_mode mode,
1467               struct exec_list *sorted_list)
1468 {
1469    exec_list_make_empty(sorted_list);
1470    nir_foreach_variable_with_modes_safe(var, shader, mode) {
1471       exec_node_remove(&var->node);
1472       insert_sorted(sorted_list, var);
1473    }
1474 }
1475 
1476 void
nir_sort_variables_by_location(nir_shader * shader,nir_variable_mode mode)1477 nir_sort_variables_by_location(nir_shader *shader, nir_variable_mode mode)
1478 {
1479    struct exec_list vars;
1480 
1481    sort_varyings(shader, mode, &vars);
1482    exec_list_append(&shader->variables, &vars);
1483 }
1484 
1485 void
nir_assign_io_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,gl_shader_stage stage)1486 nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1487                             unsigned *size, gl_shader_stage stage)
1488 {
1489    unsigned location = 0;
1490    unsigned assigned_locations[VARYING_SLOT_TESS_MAX][2];
1491    uint64_t processed_locs[2] = { 0 };
1492 
1493    struct exec_list io_vars;
1494    sort_varyings(shader, mode, &io_vars);
1495 
1496    int ASSERTED last_loc = 0;
1497    bool ASSERTED last_per_prim = false;
1498    bool last_partial = false;
1499    nir_foreach_variable_in_list(var, &io_vars) {
1500       const struct glsl_type *type = var->type;
1501       if (nir_is_arrayed_io(var, stage)) {
1502          assert(glsl_type_is_array(type));
1503          type = glsl_get_array_element(type);
1504       }
1505 
1506       int base;
1507       if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1508          base = VERT_ATTRIB_GENERIC0;
1509       else if (var->data.mode == nir_var_shader_out &&
1510                stage == MESA_SHADER_FRAGMENT)
1511          base = FRAG_RESULT_DATA0;
1512       else
1513          base = VARYING_SLOT_VAR0;
1514 
1515       unsigned var_size, driver_size;
1516       if (var->data.compact) {
1517          /* If we are inside a partial compact,
1518           * don't allow another compact to be in this slot
1519           * if it starts at component 0.
1520           */
1521          if (last_partial && var->data.location_frac == 0) {
1522             location++;
1523          }
1524 
1525          /* compact variables must be arrays of scalars */
1526          assert(!var->data.per_view);
1527          assert(glsl_type_is_array(type));
1528          assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1529          unsigned start = 4 * location + var->data.location_frac;
1530          unsigned end = start + glsl_get_length(type);
1531          var_size = driver_size = end / 4 - location;
1532          last_partial = end % 4 != 0;
1533       } else {
1534          /* Compact variables bypass the normal varying compacting pass,
1535           * which means they cannot be in the same vec4 slot as a normal
1536           * variable. If part of the current slot is taken up by a compact
1537           * variable, we need to go to the next one.
1538           */
1539          if (last_partial) {
1540             location++;
1541             last_partial = false;
1542          }
1543 
1544          /* per-view variables have an extra array dimension, which is ignored
1545           * when counting user-facing slots (var->data.location), but *not*
1546           * with driver slots (var->data.driver_location). That is, each user
1547           * slot maps to multiple driver slots.
1548           */
1549          driver_size = glsl_count_attribute_slots(type, false);
1550          if (var->data.per_view) {
1551             assert(glsl_type_is_array(type));
1552             var_size =
1553                glsl_count_attribute_slots(glsl_get_array_element(type), false);
1554          } else {
1555             var_size = driver_size;
1556          }
1557       }
1558 
1559       /* Builtins don't allow component packing so we only need to worry about
1560        * user defined varyings sharing the same location.
1561        */
1562       bool processed = false;
1563       if (var->data.location >= base) {
1564          unsigned glsl_location = var->data.location - base;
1565 
1566          for (unsigned i = 0; i < var_size; i++) {
1567             if (processed_locs[var->data.index] &
1568                 ((uint64_t)1 << (glsl_location + i)))
1569                processed = true;
1570             else
1571                processed_locs[var->data.index] |=
1572                   ((uint64_t)1 << (glsl_location + i));
1573          }
1574       }
1575 
1576       /* Because component packing allows varyings to share the same location
1577        * we may have already have processed this location.
1578        */
1579       if (processed) {
1580          /* TODO handle overlapping per-view variables */
1581          assert(!var->data.per_view);
1582          unsigned driver_location = assigned_locations[var->data.location][var->data.index];
1583          var->data.driver_location = driver_location;
1584 
1585          /* An array may be packed such that is crosses multiple other arrays
1586           * or variables, we need to make sure we have allocated the elements
1587           * consecutively if the previously proccessed var was shorter than
1588           * the current array we are processing.
1589           *
1590           * NOTE: The code below assumes the var list is ordered in ascending
1591           * location order, but per-vertex/per-primitive outputs may be
1592           * grouped separately.
1593           */
1594          assert(last_loc <= var->data.location ||
1595                 last_per_prim != var->data.per_primitive);
1596          last_loc = var->data.location;
1597          last_per_prim = var->data.per_primitive;
1598          unsigned last_slot_location = driver_location + var_size;
1599          if (last_slot_location > location) {
1600             unsigned num_unallocated_slots = last_slot_location - location;
1601             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1602             for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1603                assigned_locations[var->data.location + i][var->data.index] = location;
1604                location++;
1605             }
1606          }
1607          continue;
1608       }
1609 
1610       for (unsigned i = 0; i < var_size; i++) {
1611          assigned_locations[var->data.location + i][var->data.index] = location + i;
1612       }
1613 
1614       var->data.driver_location = location;
1615       location += driver_size;
1616    }
1617 
1618    if (last_partial)
1619       location++;
1620 
1621    exec_list_append(&shader->variables, &io_vars);
1622    *size = location;
1623 }
1624