xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_copy_prop_vars.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 
28 #include "util/bitscan.h"
29 #include "util/u_dynarray.h"
30 
31 static const bool debug = false;
32 
33 /**
34  * Variable-based copy propagation
35  *
36  * Normally, NIR trusts in SSA form for most of its copy-propagation needs.
37  * However, there are cases, especially when dealing with indirects, where SSA
38  * won't help you.  This pass is for those times.  Specifically, it handles
39  * the following things that the rest of NIR can't:
40  *
41  *  1) Copy-propagation on variables that have indirect access.  This includes
42  *     propagating from indirect stores into indirect loads.
43  *
44  *  2) Removal of redundant load_deref intrinsics.  We can't trust regular CSE
45  *     to do this because it isn't aware of variable writes that may alias the
46  *     value and make the former load invalid.
47  *
48  * This pass uses an intermediate solution between being local / "per-block"
49  * and a complete data-flow analysis.  It follows the control flow graph, and
50  * propagate the available copy information forward, invalidating data at each
51  * cf_node.
52  *
53  * Removal of dead writes to variables is handled by another pass.
54  */
55 
56 struct copies {
57    struct list_head node;
58 
59    /* Hash table of copies referenced by variables */
60    struct hash_table *ht;
61 
62    /* Array of derefs that can't be chased back to a variable */
63    struct util_dynarray arr;
64 };
65 
66 struct copies_dynarray {
67    struct list_head node;
68    struct util_dynarray arr;
69 
70    /* The copies structure this dynarray was cloned or created for */
71    struct copies *owner;
72 };
73 
74 struct vars_written {
75    nir_variable_mode modes;
76 
77    /* Key is deref and value is the uintptr_t with the write mask. */
78    struct hash_table *derefs;
79 };
80 
81 struct value {
82    bool is_ssa;
83    union {
84       struct {
85          nir_def *def[NIR_MAX_VEC_COMPONENTS];
86          uint8_t component[NIR_MAX_VEC_COMPONENTS];
87       } ssa;
88       nir_deref_and_path deref;
89    };
90 };
91 
92 static void
value_set_ssa_components(struct value * value,nir_def * def,unsigned num_components)93 value_set_ssa_components(struct value *value, nir_def *def,
94                          unsigned num_components)
95 {
96    value->is_ssa = true;
97    for (unsigned i = 0; i < num_components; i++) {
98       value->ssa.def[i] = def;
99       value->ssa.component[i] = i;
100    }
101 }
102 
103 struct copy_entry {
104    struct value src;
105 
106    nir_deref_and_path dst;
107 };
108 
109 struct copy_prop_var_state {
110    nir_function_impl *impl;
111 
112    void *mem_ctx;
113    linear_ctx *lin_ctx;
114 
115    /* Maps nodes to vars_written.  Used to invalidate copy entries when
116     * visiting each node.
117     */
118    struct hash_table *vars_written_map;
119 
120    /* List of copy structures ready for reuse */
121    struct list_head unused_copy_structs_list;
122 
123    bool progress;
124 };
125 
126 static bool
value_equals_store_src(struct value * value,nir_intrinsic_instr * intrin)127 value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin)
128 {
129    assert(intrin->intrinsic == nir_intrinsic_store_deref);
130    nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
131 
132    for (unsigned i = 0; i < intrin->num_components; i++) {
133       if ((write_mask & (1 << i)) &&
134           (value->ssa.def[i] != intrin->src[1].ssa ||
135            value->ssa.component[i] != i))
136          return false;
137    }
138 
139    return true;
140 }
141 
142 static struct vars_written *
create_vars_written(struct copy_prop_var_state * state)143 create_vars_written(struct copy_prop_var_state *state)
144 {
145    struct vars_written *written =
146       linear_zalloc_child(state->lin_ctx, sizeof(struct vars_written));
147    written->derefs = _mesa_pointer_hash_table_create(state->mem_ctx);
148    return written;
149 }
150 
151 static void
gather_vars_written(struct copy_prop_var_state * state,struct vars_written * written,nir_cf_node * cf_node)152 gather_vars_written(struct copy_prop_var_state *state,
153                     struct vars_written *written,
154                     nir_cf_node *cf_node)
155 {
156    struct vars_written *new_written = NULL;
157 
158    switch (cf_node->type) {
159    case nir_cf_node_function: {
160       nir_function_impl *impl = nir_cf_node_as_function(cf_node);
161       foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
162          gather_vars_written(state, NULL, cf_node);
163       break;
164    }
165 
166    case nir_cf_node_block: {
167       if (!written)
168          break;
169 
170       nir_block *block = nir_cf_node_as_block(cf_node);
171       nir_foreach_instr(instr, block) {
172          if (instr->type == nir_instr_type_call) {
173             written->modes |= nir_var_shader_out |
174                               nir_var_shader_temp |
175                               nir_var_function_temp |
176                               nir_var_mem_ssbo |
177                               nir_var_mem_shared |
178                               nir_var_mem_global;
179             continue;
180          }
181 
182          if (instr->type != nir_instr_type_intrinsic)
183             continue;
184 
185          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
186          switch (intrin->intrinsic) {
187          case nir_intrinsic_barrier:
188             if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
189                written->modes |= nir_intrinsic_memory_modes(intrin);
190             break;
191 
192          case nir_intrinsic_emit_vertex:
193          case nir_intrinsic_emit_vertex_with_counter:
194             written->modes = nir_var_shader_out;
195             break;
196 
197          case nir_intrinsic_trace_ray:
198          case nir_intrinsic_execute_callable:
199          case nir_intrinsic_rt_trace_ray:
200          case nir_intrinsic_rt_execute_callable: {
201             nir_deref_instr *payload =
202                nir_src_as_deref(*nir_get_shader_call_payload_src(intrin));
203 
204             nir_component_mask_t mask = (1 << glsl_get_vector_elements(payload->type)) - 1;
205 
206             struct hash_entry *ht_entry =
207                _mesa_hash_table_search(written->derefs, payload);
208             if (ht_entry) {
209                ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
210             } else {
211                _mesa_hash_table_insert(written->derefs, payload,
212                                        (void *)(uintptr_t)mask);
213             }
214             break;
215          }
216 
217          case nir_intrinsic_report_ray_intersection:
218             written->modes |= nir_var_mem_ssbo |
219                               nir_var_mem_global |
220                               nir_var_shader_call_data |
221                               nir_var_ray_hit_attrib;
222             break;
223 
224          case nir_intrinsic_ignore_ray_intersection:
225          case nir_intrinsic_terminate_ray:
226             written->modes |= nir_var_mem_ssbo |
227                               nir_var_mem_global |
228                               nir_var_shader_call_data;
229             break;
230 
231          case nir_intrinsic_deref_atomic:
232          case nir_intrinsic_deref_atomic_swap:
233          case nir_intrinsic_store_deref:
234          case nir_intrinsic_copy_deref:
235          case nir_intrinsic_memcpy_deref: {
236             /* Destination in all of store_deref, copy_deref and the atomics is src[0]. */
237             nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
238 
239             uintptr_t mask = intrin->intrinsic == nir_intrinsic_store_deref ? nir_intrinsic_write_mask(intrin) : (1 << glsl_get_vector_elements(dst->type)) - 1;
240 
241             struct hash_entry *ht_entry = _mesa_hash_table_search(written->derefs, dst);
242             if (ht_entry)
243                ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
244             else
245                _mesa_hash_table_insert(written->derefs, dst, (void *)mask);
246 
247             break;
248          }
249 
250          default:
251             break;
252          }
253       }
254 
255       break;
256    }
257 
258    case nir_cf_node_if: {
259       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
260 
261       new_written = create_vars_written(state);
262 
263       foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
264          gather_vars_written(state, new_written, cf_node);
265 
266       foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
267          gather_vars_written(state, new_written, cf_node);
268 
269       break;
270    }
271 
272    case nir_cf_node_loop: {
273       nir_loop *loop = nir_cf_node_as_loop(cf_node);
274       assert(!nir_loop_has_continue_construct(loop));
275 
276       new_written = create_vars_written(state);
277 
278       foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
279          gather_vars_written(state, new_written, cf_node);
280 
281       break;
282    }
283 
284    default:
285       unreachable("Invalid CF node type");
286    }
287 
288    if (new_written) {
289       /* Merge new information to the parent control flow node. */
290       if (written) {
291          written->modes |= new_written->modes;
292          hash_table_foreach(new_written->derefs, new_entry) {
293             struct hash_entry *old_entry =
294                _mesa_hash_table_search_pre_hashed(written->derefs, new_entry->hash,
295                                                   new_entry->key);
296             if (old_entry) {
297                nir_component_mask_t merged = (uintptr_t)new_entry->data |
298                                              (uintptr_t)old_entry->data;
299                old_entry->data = (void *)((uintptr_t)merged);
300             } else {
301                _mesa_hash_table_insert_pre_hashed(written->derefs, new_entry->hash,
302                                                   new_entry->key, new_entry->data);
303             }
304          }
305       }
306       _mesa_hash_table_insert(state->vars_written_map, cf_node, new_written);
307    }
308 }
309 
310 /* Creates a fresh dynarray */
311 static struct copies_dynarray *
get_copies_dynarray(struct copy_prop_var_state * state)312 get_copies_dynarray(struct copy_prop_var_state *state)
313 {
314    struct copies_dynarray *cp_arr =
315       ralloc(state->mem_ctx, struct copies_dynarray);
316    util_dynarray_init(&cp_arr->arr, state->mem_ctx);
317    return cp_arr;
318 }
319 
320 /* Checks if the pointer leads to a cloned copy of the array for this hash
321  * table or if the pointer was inherited from when the hash table was cloned.
322  */
323 static bool
copies_owns_ht_entry(struct copies * copies,struct hash_entry * ht_entry)324 copies_owns_ht_entry(struct copies *copies,
325                      struct hash_entry *ht_entry)
326 {
327    assert(copies && ht_entry && ht_entry->data);
328    struct copies_dynarray *copies_array = ht_entry->data;
329    return copies_array->owner == copies;
330 }
331 
332 static void
clone_copies_dynarray_from_src(struct copies_dynarray * dst,struct copies_dynarray * src)333 clone_copies_dynarray_from_src(struct copies_dynarray *dst,
334                                struct copies_dynarray *src)
335 {
336    util_dynarray_append_dynarray(&dst->arr, &src->arr);
337 }
338 
339 /* Gets copies array from the hash table entry or clones the source array if
340  * the hash entry contains NULL. The values are not cloned when the hash table
341  * is created because its expensive to clone everything and most value will
342  * never actually be accessed.
343  */
344 static struct copies_dynarray *
get_copies_array_from_ht_entry(struct copy_prop_var_state * state,struct copies * copies,struct hash_entry * ht_entry)345 get_copies_array_from_ht_entry(struct copy_prop_var_state *state,
346                                struct copies *copies,
347                                struct hash_entry *ht_entry)
348 {
349    struct copies_dynarray *copies_array;
350    if (copies_owns_ht_entry(copies, ht_entry)) {
351       /* The array already exists so just return it */
352       copies_array = (struct copies_dynarray *)ht_entry->data;
353    } else {
354       /* Clone the array and set the data value for future access */
355       copies_array = get_copies_dynarray(state);
356       copies_array->owner = copies;
357       clone_copies_dynarray_from_src(copies_array, ht_entry->data);
358       ht_entry->data = copies_array;
359    }
360 
361    return copies_array;
362 }
363 
364 static struct copies_dynarray *
copies_array_for_var(struct copy_prop_var_state * state,struct copies * copies,nir_variable * var)365 copies_array_for_var(struct copy_prop_var_state *state,
366                      struct copies *copies, nir_variable *var)
367 {
368    struct hash_entry *entry = _mesa_hash_table_search(copies->ht, var);
369    if (entry != NULL)
370       return get_copies_array_from_ht_entry(state, copies, entry);
371 
372    struct copies_dynarray *copies_array = get_copies_dynarray(state);
373    copies_array->owner = copies;
374    _mesa_hash_table_insert(copies->ht, var, copies_array);
375 
376    return copies_array;
377 }
378 
379 static struct util_dynarray *
copies_array_for_deref(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref)380 copies_array_for_deref(struct copy_prop_var_state *state,
381                        struct copies *copies, nir_deref_and_path *deref)
382 {
383    nir_get_deref_path(state->mem_ctx, deref);
384 
385    struct util_dynarray *copies_array;
386    if (deref->_path->path[0]->deref_type != nir_deref_type_var) {
387       copies_array = &copies->arr;
388    } else {
389       struct copies_dynarray *cpda =
390          copies_array_for_var(state, copies, deref->_path->path[0]->var);
391       copies_array = &cpda->arr;
392    }
393 
394    return copies_array;
395 }
396 
397 static struct copy_entry *
copy_entry_create(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref)398 copy_entry_create(struct copy_prop_var_state *state,
399                   struct copies *copies, nir_deref_and_path *deref)
400 {
401    struct util_dynarray *copies_array =
402       copies_array_for_deref(state, copies, deref);
403 
404    struct copy_entry new_entry = {
405       .dst = *deref,
406    };
407    util_dynarray_append(copies_array, struct copy_entry, new_entry);
408    return util_dynarray_top_ptr(copies_array, struct copy_entry);
409 }
410 
411 /* Remove copy entry by swapping it with the last element and reducing the
412  * size.  If used inside an iteration on copies, it must be a reverse
413  * (backwards) iteration.  It is safe to use in those cases because the swap
414  * will not affect the rest of the iteration.
415  */
416 static void
copy_entry_remove(struct util_dynarray * copies,struct copy_entry * entry,struct copy_entry ** relocated_entry)417 copy_entry_remove(struct util_dynarray *copies,
418                   struct copy_entry *entry,
419                   struct copy_entry **relocated_entry)
420 {
421    const struct copy_entry *src =
422       util_dynarray_pop_ptr(copies, struct copy_entry);
423 
424    /* Because we're removing elements from an array, pointers to those
425     * elements are not stable as we modify the array.
426     * If relocated_entry != NULL, it's points to an entry we saved off earlier
427     * and want to keep pointing to the right spot.
428     */
429    if (relocated_entry && *relocated_entry == src)
430       *relocated_entry = entry;
431 
432    if (src != entry)
433       *entry = *src;
434 }
435 
436 static bool
is_array_deref_of_vector(const nir_deref_and_path * deref)437 is_array_deref_of_vector(const nir_deref_and_path *deref)
438 {
439    if (deref->instr->deref_type != nir_deref_type_array)
440       return false;
441    nir_deref_instr *parent = nir_deref_instr_parent(deref->instr);
442    return glsl_type_is_vector(parent->type);
443 }
444 
445 static struct copy_entry *
lookup_entry_for_deref(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,nir_deref_compare_result allowed_comparisons,bool * equal)446 lookup_entry_for_deref(struct copy_prop_var_state *state,
447                        struct copies *copies,
448                        nir_deref_and_path *deref,
449                        nir_deref_compare_result allowed_comparisons,
450                        bool *equal)
451 {
452    struct util_dynarray *copies_array =
453       copies_array_for_deref(state, copies, deref);
454 
455    struct copy_entry *entry = NULL;
456    util_dynarray_foreach(copies_array, struct copy_entry, iter) {
457       nir_deref_compare_result result =
458          nir_compare_derefs_and_paths(state->mem_ctx, &iter->dst, deref);
459       if (result & allowed_comparisons) {
460          entry = iter;
461          if (result & nir_derefs_equal_bit) {
462             if (equal != NULL)
463                *equal = true;
464             break;
465          }
466          /* Keep looking in case we have an equal match later in the array. */
467       }
468    }
469 
470    return entry;
471 }
472 
473 static void
lookup_entry_and_kill_aliases_copy_array(struct copy_prop_var_state * state,struct util_dynarray * copies_array,nir_deref_and_path * deref,unsigned write_mask,bool remove_entry,struct copy_entry ** entry,bool * entry_removed)474 lookup_entry_and_kill_aliases_copy_array(struct copy_prop_var_state *state,
475                                          struct util_dynarray *copies_array,
476                                          nir_deref_and_path *deref,
477                                          unsigned write_mask,
478                                          bool remove_entry,
479                                          struct copy_entry **entry,
480                                          bool *entry_removed)
481 {
482    util_dynarray_foreach_reverse(copies_array, struct copy_entry, iter) {
483       nir_deref_compare_result comp =
484          nir_compare_derefs_and_paths(state->mem_ctx, &iter->dst, deref);
485 
486       if (comp & nir_derefs_equal_bit) {
487          /* Make sure it is unique. */
488          assert(!*entry && !*entry_removed);
489          if (remove_entry) {
490             copy_entry_remove(copies_array, iter, NULL);
491             *entry_removed = true;
492          } else {
493             *entry = iter;
494          }
495       } else if (comp & nir_derefs_may_alias_bit) {
496          copy_entry_remove(copies_array, iter, entry);
497       }
498    }
499 }
500 
501 static struct copy_entry *
lookup_entry_and_kill_aliases(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,unsigned write_mask,bool remove_entry)502 lookup_entry_and_kill_aliases(struct copy_prop_var_state *state,
503                               struct copies *copies,
504                               nir_deref_and_path *deref,
505                               unsigned write_mask,
506                               bool remove_entry)
507 {
508    /* TODO: Take into account the write_mask. */
509 
510    bool UNUSED entry_removed = false;
511    struct copy_entry *entry = NULL;
512 
513    nir_get_deref_path(state->mem_ctx, deref);
514 
515    /* For any other variable types if the variables are different,
516     * they don't alias. So we only need to compare different vars and loop
517     * over the hash table for ssbos and shared vars.
518     */
519    if (deref->_path->path[0]->deref_type != nir_deref_type_var ||
520        deref->_path->path[0]->var->data.mode == nir_var_mem_ssbo ||
521        deref->_path->path[0]->var->data.mode == nir_var_mem_shared) {
522 
523       hash_table_foreach(copies->ht, ht_entry) {
524          nir_variable *var = (nir_variable *)ht_entry->key;
525          if (deref->_path->path[0]->deref_type == nir_deref_type_var &&
526              var->data.mode != deref->_path->path[0]->var->data.mode)
527             continue;
528 
529          struct copies_dynarray *copies_array =
530             get_copies_array_from_ht_entry(state, copies, ht_entry);
531 
532          lookup_entry_and_kill_aliases_copy_array(state, &copies_array->arr,
533                                                   deref, write_mask,
534                                                   remove_entry, &entry,
535                                                   &entry_removed);
536 
537          if (copies_array->arr.size == 0) {
538             _mesa_hash_table_remove(copies->ht, ht_entry);
539          }
540       }
541 
542       lookup_entry_and_kill_aliases_copy_array(state, &copies->arr, deref,
543                                                write_mask, remove_entry,
544                                                &entry, &entry_removed);
545    } else {
546       struct copies_dynarray *cpda =
547          copies_array_for_var(state, copies, deref->_path->path[0]->var);
548       struct util_dynarray *copies_array = &cpda->arr;
549 
550       lookup_entry_and_kill_aliases_copy_array(state, copies_array, deref,
551                                                write_mask, remove_entry,
552                                                &entry, &entry_removed);
553 
554       if (copies_array->size == 0) {
555          _mesa_hash_table_remove_key(copies->ht, deref->_path->path[0]->var);
556       }
557    }
558 
559    return entry;
560 }
561 
562 static void
kill_aliases(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,unsigned write_mask)563 kill_aliases(struct copy_prop_var_state *state,
564              struct copies *copies,
565              nir_deref_and_path *deref,
566              unsigned write_mask)
567 {
568    /* TODO: Take into account the write_mask. */
569 
570    lookup_entry_and_kill_aliases(state, copies, deref, write_mask, true);
571 }
572 
573 static struct copy_entry *
get_entry_and_kill_aliases(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,unsigned write_mask)574 get_entry_and_kill_aliases(struct copy_prop_var_state *state,
575                            struct copies *copies,
576                            nir_deref_and_path *deref,
577                            unsigned write_mask)
578 {
579    /* TODO: Take into account the write_mask. */
580 
581    struct copy_entry *entry =
582       lookup_entry_and_kill_aliases(state, copies, deref, write_mask, false);
583    if (entry == NULL)
584       entry = copy_entry_create(state, copies, deref);
585 
586    return entry;
587 }
588 
589 static void
apply_barrier_for_modes_to_dynarr(struct util_dynarray * copies_array,nir_variable_mode modes)590 apply_barrier_for_modes_to_dynarr(struct util_dynarray *copies_array,
591                                   nir_variable_mode modes)
592 {
593    util_dynarray_foreach_reverse(copies_array, struct copy_entry, iter) {
594       if (nir_deref_mode_may_be(iter->dst.instr, modes) ||
595           (!iter->src.is_ssa && nir_deref_mode_may_be(iter->src.deref.instr, modes)))
596          copy_entry_remove(copies_array, iter, NULL);
597    }
598 }
599 
600 static void
apply_barrier_for_modes(struct copy_prop_var_state * state,struct copies * copies,nir_variable_mode modes)601 apply_barrier_for_modes(struct copy_prop_var_state *state,
602                         struct copies *copies, nir_variable_mode modes)
603 {
604    hash_table_foreach(copies->ht, ht_entry) {
605       struct copies_dynarray *copies_array =
606          get_copies_array_from_ht_entry(state, copies, ht_entry);
607 
608       apply_barrier_for_modes_to_dynarr(&copies_array->arr, modes);
609    }
610 
611    apply_barrier_for_modes_to_dynarr(&copies->arr, modes);
612 }
613 
614 static void
value_set_from_value(struct value * value,const struct value * from,unsigned base_index,unsigned write_mask)615 value_set_from_value(struct value *value, const struct value *from,
616                      unsigned base_index, unsigned write_mask)
617 {
618    /* We can't have non-zero indexes with non-trivial write masks */
619    assert(base_index == 0 || write_mask == 1);
620 
621    if (from->is_ssa) {
622       /* Clear value if it was being used as non-SSA. */
623       value->is_ssa = true;
624       /* Only overwrite the written components */
625       for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
626          if (write_mask & (1 << i)) {
627             value->ssa.def[base_index + i] = from->ssa.def[i];
628             value->ssa.component[base_index + i] = from->ssa.component[i];
629          }
630       }
631    } else {
632       /* Non-ssa stores always write everything */
633       value->is_ssa = false;
634       value->deref = from->deref;
635    }
636 }
637 
638 /* Try to load a single element of a vector from the copy_entry.  If the data
639  * isn't available, just let the original intrinsic do the work.
640  */
641 static bool
load_element_from_ssa_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,struct value * value,unsigned index)642 load_element_from_ssa_entry_value(struct copy_prop_var_state *state,
643                                   struct copy_entry *entry,
644                                   nir_builder *b, nir_intrinsic_instr *intrin,
645                                   struct value *value, unsigned index)
646 {
647    assert(index < glsl_get_vector_elements(entry->dst.instr->type));
648 
649    /* We don't have the element available, so let the instruction do the work. */
650    if (!entry->src.ssa.def[index])
651       return false;
652 
653    b->cursor = nir_instr_remove(&intrin->instr);
654    intrin->instr.block = NULL;
655 
656    assert(entry->src.ssa.component[index] <
657           entry->src.ssa.def[index]->num_components);
658    nir_def *def = nir_channel(b, entry->src.ssa.def[index],
659                               entry->src.ssa.component[index]);
660 
661    *value = (struct value){
662       .is_ssa = true,
663       {
664          .ssa = {
665             .def = { def },
666             .component = { 0 },
667          },
668       }
669    };
670 
671    return true;
672 }
673 
674 /* Do a "load" from an SSA-based entry return it in "value" as a value with a
675  * single SSA def.  Because an entry could reference multiple different SSA
676  * defs, a vecN operation may be inserted to combine them into a single SSA
677  * def before handing it back to the caller.  If the load instruction is no
678  * longer needed, it is removed and nir_instr::block is set to NULL.  (It is
679  * possible, in some cases, for the load to be used in the vecN operation in
680  * which case it isn't deleted.)
681  */
682 static bool
load_from_ssa_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_and_path * src,struct value * value)683 load_from_ssa_entry_value(struct copy_prop_var_state *state,
684                           struct copy_entry *entry,
685                           nir_builder *b, nir_intrinsic_instr *intrin,
686                           nir_deref_and_path *src, struct value *value)
687 {
688    if (is_array_deref_of_vector(src)) {
689       if (nir_src_is_const(src->instr->arr.index)) {
690          unsigned index = nir_src_as_uint(src->instr->arr.index);
691          return load_element_from_ssa_entry_value(state, entry, b, intrin,
692                                                   value, index);
693       }
694 
695       /* An SSA copy_entry for the vector won't help indirect load. */
696       if (glsl_type_is_vector(entry->dst.instr->type)) {
697          assert(entry->dst.instr->type == nir_deref_instr_parent(src->instr)->type);
698          /* TODO: If all SSA entries are there, try an if-ladder. */
699          return false;
700       }
701    }
702 
703    *value = entry->src;
704 
705    const struct glsl_type *type = entry->dst.instr->type;
706    unsigned num_components = glsl_get_vector_elements(type);
707 
708    nir_component_mask_t available = 0;
709    bool all_same = true;
710    for (unsigned i = 0; i < num_components; i++) {
711       if (value->ssa.def[i])
712          available |= (1 << i);
713 
714       if (value->ssa.def[i] != value->ssa.def[0])
715          all_same = false;
716 
717       if (value->ssa.component[i] != i)
718          all_same = false;
719    }
720 
721    if (all_same) {
722       /* Our work here is done */
723       b->cursor = nir_instr_remove(&intrin->instr);
724       intrin->instr.block = NULL;
725       return true;
726    }
727 
728    if (available != (1 << num_components) - 1 &&
729        intrin->intrinsic == nir_intrinsic_load_deref &&
730        (available & nir_def_components_read(&intrin->def)) == 0) {
731       /* If none of the components read are available as SSA values, then we
732        * should just bail.  Otherwise, we would end up replacing the uses of
733        * the load_deref a vecN() that just gathers up its components.
734        */
735       return false;
736    }
737 
738    b->cursor = nir_after_instr(&intrin->instr);
739 
740    nir_def *load_def =
741       intrin->intrinsic == nir_intrinsic_load_deref ? &intrin->def : NULL;
742 
743    bool keep_intrin = false;
744    nir_scalar comps[NIR_MAX_VEC_COMPONENTS];
745    for (unsigned i = 0; i < num_components; i++) {
746       if (value->ssa.def[i]) {
747          comps[i] = nir_get_scalar(value->ssa.def[i], value->ssa.component[i]);
748       } else {
749          /* We don't have anything for this component in our
750           * list.  Just re-use a channel from the load.
751           */
752          if (load_def == NULL)
753             load_def = nir_load_deref(b, entry->dst.instr);
754 
755          if (load_def->parent_instr == &intrin->instr)
756             keep_intrin = true;
757 
758          comps[i] = nir_get_scalar(load_def, i);
759       }
760    }
761 
762    nir_def *vec = nir_vec_scalars(b, comps, num_components);
763    value_set_ssa_components(value, vec, num_components);
764 
765    if (!keep_intrin) {
766       /* Removing this instruction should not touch the cursor because we
767        * created the cursor after the intrinsic and have added at least one
768        * instruction (the vec) since then.
769        */
770       assert(b->cursor.instr != &intrin->instr);
771       nir_instr_remove(&intrin->instr);
772       intrin->instr.block = NULL;
773    }
774 
775    return true;
776 }
777 
778 /**
779  * Specialize the wildcards in a deref chain
780  *
781  * This function returns a deref chain identical to \param deref except that
782  * some of its wildcards are replaced with indices from \param specific.  The
783  * process is guided by \param guide which references the same type as \param
784  * specific but has the same wildcard array lengths as \param deref.
785  */
786 static nir_deref_instr *
specialize_wildcards(nir_builder * b,nir_deref_path * deref,nir_deref_path * guide,nir_deref_path * specific)787 specialize_wildcards(nir_builder *b,
788                      nir_deref_path *deref,
789                      nir_deref_path *guide,
790                      nir_deref_path *specific)
791 {
792    nir_deref_instr **deref_p = &deref->path[1];
793    nir_deref_instr *ret_tail = deref->path[0];
794    for (; *deref_p; deref_p++) {
795       if ((*deref_p)->deref_type == nir_deref_type_array_wildcard)
796          break;
797       ret_tail = *deref_p;
798    }
799 
800    nir_deref_instr **guide_p = &guide->path[1];
801    nir_deref_instr **spec_p = &specific->path[1];
802    for (; *deref_p; deref_p++) {
803       if ((*deref_p)->deref_type == nir_deref_type_array_wildcard) {
804          /* This is where things get tricky.  We have to search through
805           * the entry deref to find its corresponding wildcard and fill
806           * this slot in with the value from the src.
807           */
808          while (*guide_p &&
809                 (*guide_p)->deref_type != nir_deref_type_array_wildcard) {
810             guide_p++;
811             spec_p++;
812          }
813          assert(*guide_p && *spec_p);
814 
815          ret_tail = nir_build_deref_follower(b, ret_tail, *spec_p);
816 
817          guide_p++;
818          spec_p++;
819       } else {
820          ret_tail = nir_build_deref_follower(b, ret_tail, *deref_p);
821       }
822    }
823 
824    return ret_tail;
825 }
826 
827 /* Do a "load" from an deref-based entry return it in "value" as a value.  The
828  * deref returned in "value" will always be a fresh copy so the caller can
829  * steal it and assign it to the instruction directly without copying it
830  * again.
831  */
832 static bool
load_from_deref_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_and_path * src,struct value * value)833 load_from_deref_entry_value(struct copy_prop_var_state *state,
834                             struct copy_entry *entry,
835                             nir_builder *b, nir_intrinsic_instr *intrin,
836                             nir_deref_and_path *src, struct value *value)
837 {
838    *value = entry->src;
839 
840    b->cursor = nir_instr_remove(&intrin->instr);
841 
842    nir_deref_path *entry_dst_path = nir_get_deref_path(state->mem_ctx, &entry->dst);
843    nir_deref_path *src_path = nir_get_deref_path(state->mem_ctx, src);
844 
845    bool need_to_specialize_wildcards = false;
846    nir_deref_instr **entry_p = &entry_dst_path->path[1];
847    nir_deref_instr **src_p = &src_path->path[1];
848    while (*entry_p && *src_p) {
849       nir_deref_instr *entry_tail = *entry_p++;
850       nir_deref_instr *src_tail = *src_p++;
851 
852       if (src_tail->deref_type == nir_deref_type_array &&
853           entry_tail->deref_type == nir_deref_type_array_wildcard)
854          need_to_specialize_wildcards = true;
855    }
856 
857    /* If the entry deref is longer than the source deref then it refers to a
858     * smaller type and we can't source from it.
859     */
860    assert(*entry_p == NULL);
861 
862    value->deref._path = NULL;
863 
864    if (need_to_specialize_wildcards) {
865       /* The entry has some wildcards that are not in src.  This means we need
866        * to construct a new deref based on the entry but using the wildcards
867        * from the source and guided by the entry dst.  Oof.
868        */
869       nir_deref_path *entry_src_path =
870          nir_get_deref_path(state->mem_ctx, &entry->src.deref);
871       value->deref.instr = specialize_wildcards(b, entry_src_path,
872                                                 entry_dst_path, src_path);
873    }
874 
875    /* If our source deref is longer than the entry deref, that's ok because
876     * it just means the entry deref needs to be extended a bit.
877     */
878    while (*src_p) {
879       nir_deref_instr *src_tail = *src_p++;
880       value->deref.instr = nir_build_deref_follower(b, value->deref.instr, src_tail);
881    }
882 
883    return true;
884 }
885 
886 static bool
try_load_from_entry(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_and_path * src,struct value * value)887 try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
888                     nir_builder *b, nir_intrinsic_instr *intrin,
889                     nir_deref_and_path *src, struct value *value)
890 {
891    if (entry == NULL)
892       return false;
893 
894    if (entry->src.is_ssa) {
895       return load_from_ssa_entry_value(state, entry, b, intrin, src, value);
896    } else {
897       return load_from_deref_entry_value(state, entry, b, intrin, src, value);
898    }
899 }
900 
901 static void
invalidate_copies_for_cf_node(struct copy_prop_var_state * state,struct copies * copies,nir_cf_node * cf_node)902 invalidate_copies_for_cf_node(struct copy_prop_var_state *state,
903                               struct copies *copies,
904                               nir_cf_node *cf_node)
905 {
906    struct hash_entry *ht_entry = _mesa_hash_table_search(state->vars_written_map, cf_node);
907    assert(ht_entry);
908 
909    struct vars_written *written = ht_entry->data;
910    if (written->modes) {
911       hash_table_foreach(copies->ht, ht_entry) {
912          struct copies_dynarray *copies_array =
913             get_copies_array_from_ht_entry(state, copies, ht_entry);
914 
915          util_dynarray_foreach_reverse(&copies_array->arr, struct copy_entry, entry) {
916             if (nir_deref_mode_may_be(entry->dst.instr, written->modes))
917                copy_entry_remove(&copies_array->arr, entry, NULL);
918          }
919 
920          if (copies_array->arr.size == 0) {
921             _mesa_hash_table_remove(copies->ht, ht_entry);
922          }
923       }
924 
925       util_dynarray_foreach_reverse(&copies->arr, struct copy_entry, entry) {
926          if (nir_deref_mode_may_be(entry->dst.instr, written->modes))
927             copy_entry_remove(&copies->arr, entry, NULL);
928       }
929    }
930 
931    hash_table_foreach(written->derefs, entry) {
932       nir_deref_instr *deref_written = (nir_deref_instr *)entry->key;
933       nir_deref_and_path deref = { deref_written, NULL };
934       kill_aliases(state, copies, &deref, (uintptr_t)entry->data);
935    }
936 }
937 
938 static void
print_value(struct value * value,unsigned num_components)939 print_value(struct value *value, unsigned num_components)
940 {
941    bool same_ssa = true;
942    for (unsigned i = 0; i < num_components; i++) {
943       if (value->ssa.component[i] != i ||
944           (i > 0 && value->ssa.def[i - 1] != value->ssa.def[i])) {
945          same_ssa = false;
946          break;
947       }
948    }
949    if (same_ssa) {
950       printf(" ssa_%d", value->ssa.def[0]->index);
951    } else {
952       for (int i = 0; i < num_components; i++) {
953          if (value->ssa.def[i])
954             printf(" ssa_%d[%u]", value->ssa.def[i]->index, value->ssa.component[i]);
955          else
956             printf(" _");
957       }
958    }
959 }
960 
961 static void
print_copy_entry(struct copy_entry * entry)962 print_copy_entry(struct copy_entry *entry)
963 {
964    printf("    %s ", glsl_get_type_name(entry->dst.instr->type));
965    nir_print_deref(entry->dst.instr, stdout);
966    printf(":\t");
967 
968    unsigned num_components = glsl_get_vector_elements(entry->dst.instr->type);
969    print_value(&entry->src, num_components);
970    printf("\n");
971 }
972 
973 static void
dump_instr(nir_instr * instr)974 dump_instr(nir_instr *instr)
975 {
976    printf("  ");
977    nir_print_instr(instr, stdout);
978    printf("\n");
979 }
980 
981 static void
dump_copy_entries(struct copies * copies)982 dump_copy_entries(struct copies *copies)
983 {
984    hash_table_foreach(copies->ht, ht_entry) {
985       struct util_dynarray *copies_array =
986          &((struct copies_dynarray *)ht_entry->data)->arr;
987 
988       util_dynarray_foreach(copies_array, struct copy_entry, iter)
989          print_copy_entry(iter);
990    }
991 
992    util_dynarray_foreach(&copies->arr, struct copy_entry, iter)
993       print_copy_entry(iter);
994 
995    printf("\n");
996 }
997 
998 static void
copy_prop_vars_block(struct copy_prop_var_state * state,nir_builder * b,nir_block * block,struct copies * copies)999 copy_prop_vars_block(struct copy_prop_var_state *state,
1000                      nir_builder *b, nir_block *block,
1001                      struct copies *copies)
1002 {
1003    if (debug) {
1004       printf("# block%d\n", block->index);
1005       dump_copy_entries(copies);
1006    }
1007 
1008    nir_foreach_instr_safe(instr, block) {
1009       if (debug && instr->type == nir_instr_type_deref)
1010          dump_instr(instr);
1011 
1012       if (instr->type == nir_instr_type_call) {
1013          if (debug)
1014             dump_instr(instr);
1015          apply_barrier_for_modes(state, copies, nir_var_shader_out | nir_var_shader_temp | nir_var_function_temp | nir_var_mem_ssbo | nir_var_mem_shared | nir_var_mem_global);
1016          if (debug)
1017             dump_copy_entries(copies);
1018          continue;
1019       }
1020 
1021       if (instr->type != nir_instr_type_intrinsic)
1022          continue;
1023 
1024       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1025       switch (intrin->intrinsic) {
1026       case nir_intrinsic_barrier:
1027          if (debug)
1028             dump_instr(instr);
1029 
1030          if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
1031             apply_barrier_for_modes(state, copies, nir_intrinsic_memory_modes(intrin));
1032          break;
1033 
1034       case nir_intrinsic_emit_vertex:
1035       case nir_intrinsic_emit_vertex_with_counter:
1036          if (debug)
1037             dump_instr(instr);
1038 
1039          apply_barrier_for_modes(state, copies, nir_var_shader_out);
1040          break;
1041 
1042       case nir_intrinsic_report_ray_intersection:
1043          apply_barrier_for_modes(state, copies, nir_var_mem_ssbo | nir_var_mem_global | nir_var_shader_call_data | nir_var_ray_hit_attrib);
1044          break;
1045 
1046       case nir_intrinsic_ignore_ray_intersection:
1047       case nir_intrinsic_terminate_ray:
1048          apply_barrier_for_modes(state, copies, nir_var_mem_ssbo | nir_var_mem_global | nir_var_shader_call_data);
1049          break;
1050 
1051       case nir_intrinsic_load_deref: {
1052          if (debug)
1053             dump_instr(instr);
1054 
1055          if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE)
1056             break;
1057 
1058          nir_deref_and_path src = { nir_src_as_deref(intrin->src[0]), NULL };
1059 
1060          /* If this is a load from a read-only mode, then all this pass would
1061           * do is combine redundant loads and CSE should be more efficient for
1062           * that.
1063           */
1064          nir_variable_mode ignore = nir_var_read_only_modes & ~nir_var_vec_indexable_modes;
1065          if (nir_deref_mode_must_be(src.instr, ignore))
1066             break;
1067 
1068          /* Ignore trivial casts. If trivial casts are applied to array derefs of vectors,
1069           * not doing this causes is_array_deref_of_vector to (wrongly) return false. */
1070          while (src.instr->deref_type == nir_deref_type_cast &&
1071                 nir_deref_instr_parent(src.instr) && nir_deref_cast_is_trivial(src.instr))
1072             src.instr = nir_deref_instr_parent(src.instr);
1073 
1074          /* Direct array_derefs of vectors operate on the vectors (the parent
1075           * deref).  Indirects will be handled like other derefs.
1076           */
1077          int vec_index = 0;
1078          nir_deref_and_path vec_src = src;
1079          if (is_array_deref_of_vector(&src) && nir_src_is_const(src.instr->arr.index)) {
1080             vec_src.instr = nir_deref_instr_parent(src.instr);
1081             unsigned vec_comps = glsl_get_vector_elements(vec_src.instr->type);
1082             vec_index = nir_src_as_uint(src.instr->arr.index);
1083 
1084             /* Loading from an invalid index yields an undef */
1085             if (vec_index >= vec_comps) {
1086                b->cursor = nir_instr_remove(instr);
1087                nir_def *u = nir_undef(b, 1, intrin->def.bit_size);
1088                nir_def_rewrite_uses(&intrin->def, u);
1089                state->progress = true;
1090                break;
1091             }
1092          }
1093 
1094          bool src_entry_equal = false;
1095          struct copy_entry *src_entry =
1096             lookup_entry_for_deref(state, copies, &src,
1097                                    nir_derefs_a_contains_b_bit, &src_entry_equal);
1098          struct value value = { 0 };
1099          if (try_load_from_entry(state, src_entry, b, intrin, &src, &value)) {
1100             if (value.is_ssa) {
1101                /* lookup_load has already ensured that we get a single SSA
1102                 * value that has all of the channels.  We just have to do the
1103                 * rewrite operation.  Note for array derefs of vectors, the
1104                 * channel 0 is used.
1105                 */
1106                if (intrin->instr.block) {
1107                   /* The lookup left our instruction in-place.  This means it
1108                    * must have used it to vec up a bunch of different sources.
1109                    * We need to be careful when rewriting uses so we don't
1110                    * rewrite the vecN itself.
1111                    */
1112                   nir_def_rewrite_uses_after(&intrin->def,
1113                                              value.ssa.def[0],
1114                                              value.ssa.def[0]->parent_instr);
1115                } else {
1116                   nir_def_rewrite_uses(&intrin->def,
1117                                        value.ssa.def[0]);
1118                }
1119             } else {
1120                /* We're turning it into a load of a different variable */
1121                intrin->src[0] = nir_src_for_ssa(&value.deref.instr->def);
1122 
1123                /* Put it back in again. */
1124                nir_builder_instr_insert(b, instr);
1125                value_set_ssa_components(&value, &intrin->def,
1126                                         intrin->num_components);
1127             }
1128             state->progress = true;
1129          } else {
1130             value_set_ssa_components(&value, &intrin->def,
1131                                      intrin->num_components);
1132          }
1133 
1134          /* Now that we have a value, we're going to store it back so that we
1135           * have the right value next time we come looking for it.  In order
1136           * to do this, we need an exact match, not just something that
1137           * contains what we're looking for.
1138           *
1139           * We avoid doing another lookup if src.instr == vec_src.instr.
1140           */
1141          struct copy_entry *entry = src_entry;
1142          if (src.instr != vec_src.instr)
1143             entry = lookup_entry_for_deref(state, copies, &vec_src,
1144                                            nir_derefs_equal_bit, NULL);
1145          else if (!src_entry_equal)
1146             entry = NULL;
1147 
1148          if (!entry)
1149             entry = copy_entry_create(state, copies, &vec_src);
1150 
1151          /* Update the entry with the value of the load.  This way
1152           * we can potentially remove subsequent loads.
1153           */
1154          value_set_from_value(&entry->src, &value, vec_index,
1155                               (1 << intrin->num_components) - 1);
1156          break;
1157       }
1158 
1159       case nir_intrinsic_store_deref: {
1160          if (debug)
1161             dump_instr(instr);
1162 
1163          nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1164          assert(glsl_type_is_vector_or_scalar(dst.instr->type));
1165 
1166          /* Ignore trivial casts. If trivial casts are applied to array derefs of vectors,
1167           * not doing this causes is_array_deref_of_vector to (wrongly) return false. */
1168          while (dst.instr->deref_type == nir_deref_type_cast &&
1169                 nir_deref_instr_parent(dst.instr) && nir_deref_cast_is_trivial(dst.instr))
1170             dst.instr = nir_deref_instr_parent(dst.instr);
1171 
1172          /* Direct array_derefs of vectors operate on the vectors (the parent
1173           * deref).  Indirects will be handled like other derefs.
1174           */
1175          int vec_index = 0;
1176          nir_deref_and_path vec_dst = dst;
1177          if (is_array_deref_of_vector(&dst) && nir_src_is_const(dst.instr->arr.index)) {
1178             vec_dst.instr = nir_deref_instr_parent(dst.instr);
1179             unsigned vec_comps = glsl_get_vector_elements(vec_dst.instr->type);
1180 
1181             vec_index = nir_src_as_uint(dst.instr->arr.index);
1182 
1183             /* Storing to an invalid index is a no-op. */
1184             if (vec_index >= vec_comps) {
1185                nir_instr_remove(instr);
1186                state->progress = true;
1187                break;
1188             }
1189          }
1190 
1191          if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE) {
1192             unsigned wrmask = nir_intrinsic_write_mask(intrin);
1193             kill_aliases(state, copies, &dst, wrmask);
1194             break;
1195          }
1196 
1197          struct copy_entry *entry =
1198             lookup_entry_for_deref(state, copies, &dst, nir_derefs_equal_bit, NULL);
1199          if (entry && value_equals_store_src(&entry->src, intrin)) {
1200             /* If we are storing the value from a load of the same var the
1201              * store is redundant so remove it.
1202              */
1203             nir_instr_remove(instr);
1204             state->progress = true;
1205          } else {
1206             struct value value = { 0 };
1207             value_set_ssa_components(&value, intrin->src[1].ssa,
1208                                      intrin->num_components);
1209             unsigned wrmask = nir_intrinsic_write_mask(intrin);
1210             struct copy_entry *entry =
1211                get_entry_and_kill_aliases(state, copies, &vec_dst, wrmask);
1212             value_set_from_value(&entry->src, &value, vec_index, wrmask);
1213          }
1214 
1215          break;
1216       }
1217 
1218       case nir_intrinsic_copy_deref: {
1219          if (debug)
1220             dump_instr(instr);
1221 
1222          nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1223          nir_deref_and_path src = { nir_src_as_deref(intrin->src[1]), NULL };
1224 
1225          /* The copy_deref intrinsic doesn't keep track of num_components, so
1226           * get it ourselves.
1227           */
1228          unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1229          unsigned full_mask = (1 << num_components) - 1;
1230 
1231          if ((nir_intrinsic_src_access(intrin) & ACCESS_VOLATILE) ||
1232              (nir_intrinsic_dst_access(intrin) & ACCESS_VOLATILE)) {
1233             kill_aliases(state, copies, &dst, full_mask);
1234             break;
1235          }
1236 
1237          nir_deref_compare_result comp =
1238             nir_compare_derefs_and_paths(state->mem_ctx, &src, &dst);
1239          if (comp & nir_derefs_equal_bit) {
1240             /* This is a no-op self-copy.  Get rid of it */
1241             nir_instr_remove(instr);
1242             state->progress = true;
1243             continue;
1244          }
1245 
1246          /* Copy of direct array derefs of vectors are not handled.  Just
1247           * invalidate what's written and bail.
1248           */
1249          if ((is_array_deref_of_vector(&src) && nir_src_is_const(src.instr->arr.index)) ||
1250              (is_array_deref_of_vector(&dst) && nir_src_is_const(dst.instr->arr.index))) {
1251             kill_aliases(state, copies, &dst, full_mask);
1252             break;
1253          }
1254 
1255          struct copy_entry *src_entry =
1256             lookup_entry_for_deref(state, copies, &src, nir_derefs_a_contains_b_bit, NULL);
1257          struct value value;
1258          if (try_load_from_entry(state, src_entry, b, intrin, &src, &value)) {
1259             /* If load works, intrin (the copy_deref) is removed. */
1260             if (value.is_ssa) {
1261                nir_store_deref(b, dst.instr, value.ssa.def[0], full_mask);
1262             } else {
1263                /* If this would be a no-op self-copy, don't bother. */
1264                comp = nir_compare_derefs_and_paths(state->mem_ctx, &value.deref, &dst);
1265                if (comp & nir_derefs_equal_bit)
1266                   continue;
1267 
1268                /* Just turn it into a copy of a different deref */
1269                intrin->src[1] = nir_src_for_ssa(&value.deref.instr->def);
1270 
1271                /* Put it back in again. */
1272                nir_builder_instr_insert(b, instr);
1273             }
1274 
1275             state->progress = true;
1276          } else {
1277             value = (struct value){
1278                .is_ssa = false,
1279                { .deref = src },
1280             };
1281          }
1282 
1283          nir_variable *src_var = nir_deref_instr_get_variable(src.instr);
1284          if (src_var && src_var->data.cannot_coalesce) {
1285             /* The source cannot be coaleseced, which means we can't propagate
1286              * this copy.
1287              */
1288             break;
1289          }
1290 
1291          struct copy_entry *dst_entry =
1292             get_entry_and_kill_aliases(state, copies, &dst, full_mask);
1293          value_set_from_value(&dst_entry->src, &value, 0, full_mask);
1294          break;
1295       }
1296 
1297       case nir_intrinsic_trace_ray:
1298       case nir_intrinsic_execute_callable:
1299       case nir_intrinsic_rt_trace_ray:
1300       case nir_intrinsic_rt_execute_callable: {
1301          if (debug)
1302             dump_instr(instr);
1303 
1304          nir_deref_and_path payload = {
1305             nir_src_as_deref(*nir_get_shader_call_payload_src(intrin)), NULL
1306          };
1307          nir_component_mask_t full_mask = (1 << glsl_get_vector_elements(payload.instr->type)) - 1;
1308          kill_aliases(state, copies, &payload, full_mask);
1309          break;
1310       }
1311 
1312       case nir_intrinsic_memcpy_deref:
1313       case nir_intrinsic_deref_atomic:
1314       case nir_intrinsic_deref_atomic_swap:
1315          if (debug)
1316             dump_instr(instr);
1317 
1318          nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1319          unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1320          unsigned full_mask = (1 << num_components) - 1;
1321          kill_aliases(state, copies, &dst, full_mask);
1322          break;
1323 
1324       case nir_intrinsic_store_deref_block_intel: {
1325          if (debug)
1326             dump_instr(instr);
1327 
1328          /* Invalidate the whole variable (or cast) and anything that alias
1329           * with it.
1330           */
1331          nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1332          while (nir_deref_instr_parent(dst.instr))
1333             dst.instr = nir_deref_instr_parent(dst.instr);
1334          assert(dst.instr->deref_type == nir_deref_type_var ||
1335                 dst.instr->deref_type == nir_deref_type_cast);
1336 
1337          unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1338          unsigned full_mask = (1 << num_components) - 1;
1339          kill_aliases(state, copies, &dst, full_mask);
1340          break;
1341       }
1342 
1343       default:
1344          continue; /* To skip the debug below. */
1345       }
1346 
1347       if (debug)
1348          dump_copy_entries(copies);
1349    }
1350 }
1351 
1352 static void
clone_copies(struct copy_prop_var_state * state,struct copies * clones,struct copies * copies)1353 clone_copies(struct copy_prop_var_state *state, struct copies *clones,
1354              struct copies *copies)
1355 {
1356    /* Simply clone the entire hash table. This is much faster than trying to
1357     * rebuild it and is needed to avoid slow compilation of very large shaders.
1358     * If needed we will clone the data later if it is ever looked up.
1359     */
1360    assert(clones->ht == NULL);
1361    clones->ht = _mesa_hash_table_clone(copies->ht, state->mem_ctx);
1362 
1363    util_dynarray_clone(&clones->arr, state->mem_ctx, &copies->arr);
1364 }
1365 
1366 /* Returns an existing struct for reuse or creates a new on if they are
1367  * all in use. This greatly reduces the time spent allocating memory if we
1368  * were to just creating a fresh one each time.
1369  */
1370 static struct copies *
get_copies_structure(struct copy_prop_var_state * state)1371 get_copies_structure(struct copy_prop_var_state *state)
1372 {
1373    struct copies *copies;
1374    if (list_is_empty(&state->unused_copy_structs_list)) {
1375       copies = ralloc(state->mem_ctx, struct copies);
1376       copies->ht = NULL;
1377       util_dynarray_init(&copies->arr, state->mem_ctx);
1378    } else {
1379       copies = list_entry(state->unused_copy_structs_list.next,
1380                           struct copies, node);
1381       list_del(&copies->node);
1382    }
1383 
1384    return copies;
1385 }
1386 
1387 static void
clear_copies_structure(struct copy_prop_var_state * state,struct copies * copies)1388 clear_copies_structure(struct copy_prop_var_state *state,
1389                        struct copies *copies)
1390 {
1391    ralloc_free(copies->ht);
1392    copies->ht = NULL;
1393 
1394    list_add(&copies->node, &state->unused_copy_structs_list);
1395 }
1396 
1397 static void
copy_prop_vars_cf_node(struct copy_prop_var_state * state,struct copies * copies,nir_cf_node * cf_node)1398 copy_prop_vars_cf_node(struct copy_prop_var_state *state,
1399                        struct copies *copies, nir_cf_node *cf_node)
1400 {
1401    switch (cf_node->type) {
1402    case nir_cf_node_function: {
1403       nir_function_impl *impl = nir_cf_node_as_function(cf_node);
1404 
1405       struct copies *impl_copies = get_copies_structure(state);
1406       impl_copies->ht = _mesa_hash_table_create(state->mem_ctx,
1407                                                 _mesa_hash_pointer,
1408                                                 _mesa_key_pointer_equal);
1409 
1410       foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
1411          copy_prop_vars_cf_node(state, impl_copies, cf_node);
1412 
1413       clear_copies_structure(state, impl_copies);
1414 
1415       break;
1416    }
1417 
1418    case nir_cf_node_block: {
1419       nir_block *block = nir_cf_node_as_block(cf_node);
1420       nir_builder b = nir_builder_create(state->impl);
1421       copy_prop_vars_block(state, &b, block, copies);
1422       break;
1423    }
1424 
1425    case nir_cf_node_if: {
1426       nir_if *if_stmt = nir_cf_node_as_if(cf_node);
1427 
1428       /* Create new hash tables for tracking vars and fill it with clones of
1429        * the copy arrays for each variable we are tracking.
1430        *
1431        * We clone the copies for each branch of the if statement.  The idea is
1432        * that they both see the same state of available copies, but do not
1433        * interfere to each other.
1434        */
1435       if (!exec_list_is_empty(&if_stmt->then_list)) {
1436          struct copies *then_copies = get_copies_structure(state);
1437          clone_copies(state, then_copies, copies);
1438 
1439          foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
1440             copy_prop_vars_cf_node(state, then_copies, cf_node);
1441 
1442          clear_copies_structure(state, then_copies);
1443       }
1444 
1445       if (!exec_list_is_empty(&if_stmt->else_list)) {
1446          struct copies *else_copies = get_copies_structure(state);
1447          clone_copies(state, else_copies, copies);
1448 
1449          foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
1450             copy_prop_vars_cf_node(state, else_copies, cf_node);
1451 
1452          clear_copies_structure(state, else_copies);
1453       }
1454 
1455       /* Both branches copies can be ignored, since the effect of running both
1456        * branches was captured in the first pass that collects vars_written.
1457        */
1458 
1459       invalidate_copies_for_cf_node(state, copies, cf_node);
1460 
1461       break;
1462    }
1463 
1464    case nir_cf_node_loop: {
1465       nir_loop *loop = nir_cf_node_as_loop(cf_node);
1466       assert(!nir_loop_has_continue_construct(loop));
1467 
1468       /* Invalidate before cloning the copies for the loop, since the loop
1469        * body can be executed more than once.
1470        */
1471 
1472       invalidate_copies_for_cf_node(state, copies, cf_node);
1473 
1474       struct copies *loop_copies = get_copies_structure(state);
1475       clone_copies(state, loop_copies, copies);
1476 
1477       foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
1478          copy_prop_vars_cf_node(state, loop_copies, cf_node);
1479 
1480       clear_copies_structure(state, loop_copies);
1481 
1482       break;
1483    }
1484 
1485    default:
1486       unreachable("Invalid CF node type");
1487    }
1488 }
1489 
1490 static bool
nir_copy_prop_vars_impl(nir_function_impl * impl)1491 nir_copy_prop_vars_impl(nir_function_impl *impl)
1492 {
1493    void *mem_ctx = ralloc_context(NULL);
1494 
1495    if (debug) {
1496       nir_metadata_require(impl, nir_metadata_block_index);
1497       printf("## nir_copy_prop_vars_impl for %s\n", impl->function->name);
1498    }
1499 
1500    struct copy_prop_var_state state = {
1501       .impl = impl,
1502       .mem_ctx = mem_ctx,
1503       .lin_ctx = linear_context(mem_ctx),
1504 
1505       .vars_written_map = _mesa_pointer_hash_table_create(mem_ctx),
1506    };
1507    list_inithead(&state.unused_copy_structs_list);
1508 
1509    gather_vars_written(&state, NULL, &impl->cf_node);
1510 
1511    copy_prop_vars_cf_node(&state, NULL, &impl->cf_node);
1512 
1513    if (state.progress) {
1514       nir_metadata_preserve(impl, nir_metadata_control_flow);
1515    } else {
1516       nir_metadata_preserve(impl, nir_metadata_all);
1517    }
1518 
1519    ralloc_free(mem_ctx);
1520    return state.progress;
1521 }
1522 
1523 bool
nir_opt_copy_prop_vars(nir_shader * shader)1524 nir_opt_copy_prop_vars(nir_shader *shader)
1525 {
1526    bool progress = false;
1527 
1528    nir_foreach_function_impl(impl, shader) {
1529       progress |= nir_copy_prop_vars_impl(impl);
1530    }
1531 
1532    return progress;
1533 }
1534