1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 #include "util/bitscan.h"
29 #include "util/u_dynarray.h"
30
31 static const bool debug = false;
32
33 /**
34 * Variable-based copy propagation
35 *
36 * Normally, NIR trusts in SSA form for most of its copy-propagation needs.
37 * However, there are cases, especially when dealing with indirects, where SSA
38 * won't help you. This pass is for those times. Specifically, it handles
39 * the following things that the rest of NIR can't:
40 *
41 * 1) Copy-propagation on variables that have indirect access. This includes
42 * propagating from indirect stores into indirect loads.
43 *
44 * 2) Removal of redundant load_deref intrinsics. We can't trust regular CSE
45 * to do this because it isn't aware of variable writes that may alias the
46 * value and make the former load invalid.
47 *
48 * This pass uses an intermediate solution between being local / "per-block"
49 * and a complete data-flow analysis. It follows the control flow graph, and
50 * propagate the available copy information forward, invalidating data at each
51 * cf_node.
52 *
53 * Removal of dead writes to variables is handled by another pass.
54 */
55
56 struct copies {
57 struct list_head node;
58
59 /* Hash table of copies referenced by variables */
60 struct hash_table *ht;
61
62 /* Array of derefs that can't be chased back to a variable */
63 struct util_dynarray arr;
64 };
65
66 struct copies_dynarray {
67 struct list_head node;
68 struct util_dynarray arr;
69
70 /* The copies structure this dynarray was cloned or created for */
71 struct copies *owner;
72 };
73
74 struct vars_written {
75 nir_variable_mode modes;
76
77 /* Key is deref and value is the uintptr_t with the write mask. */
78 struct hash_table *derefs;
79 };
80
81 struct value {
82 bool is_ssa;
83 union {
84 struct {
85 nir_def *def[NIR_MAX_VEC_COMPONENTS];
86 uint8_t component[NIR_MAX_VEC_COMPONENTS];
87 } ssa;
88 nir_deref_and_path deref;
89 };
90 };
91
92 static void
value_set_ssa_components(struct value * value,nir_def * def,unsigned num_components)93 value_set_ssa_components(struct value *value, nir_def *def,
94 unsigned num_components)
95 {
96 value->is_ssa = true;
97 for (unsigned i = 0; i < num_components; i++) {
98 value->ssa.def[i] = def;
99 value->ssa.component[i] = i;
100 }
101 }
102
103 struct copy_entry {
104 struct value src;
105
106 nir_deref_and_path dst;
107 };
108
109 struct copy_prop_var_state {
110 nir_function_impl *impl;
111
112 void *mem_ctx;
113 linear_ctx *lin_ctx;
114
115 /* Maps nodes to vars_written. Used to invalidate copy entries when
116 * visiting each node.
117 */
118 struct hash_table *vars_written_map;
119
120 /* List of copy structures ready for reuse */
121 struct list_head unused_copy_structs_list;
122
123 bool progress;
124 };
125
126 static bool
value_equals_store_src(struct value * value,nir_intrinsic_instr * intrin)127 value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin)
128 {
129 assert(intrin->intrinsic == nir_intrinsic_store_deref);
130 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
131
132 for (unsigned i = 0; i < intrin->num_components; i++) {
133 if ((write_mask & (1 << i)) &&
134 (value->ssa.def[i] != intrin->src[1].ssa ||
135 value->ssa.component[i] != i))
136 return false;
137 }
138
139 return true;
140 }
141
142 static struct vars_written *
create_vars_written(struct copy_prop_var_state * state)143 create_vars_written(struct copy_prop_var_state *state)
144 {
145 struct vars_written *written =
146 linear_zalloc_child(state->lin_ctx, sizeof(struct vars_written));
147 written->derefs = _mesa_pointer_hash_table_create(state->mem_ctx);
148 return written;
149 }
150
151 static void
gather_vars_written(struct copy_prop_var_state * state,struct vars_written * written,nir_cf_node * cf_node)152 gather_vars_written(struct copy_prop_var_state *state,
153 struct vars_written *written,
154 nir_cf_node *cf_node)
155 {
156 struct vars_written *new_written = NULL;
157
158 switch (cf_node->type) {
159 case nir_cf_node_function: {
160 nir_function_impl *impl = nir_cf_node_as_function(cf_node);
161 foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
162 gather_vars_written(state, NULL, cf_node);
163 break;
164 }
165
166 case nir_cf_node_block: {
167 if (!written)
168 break;
169
170 nir_block *block = nir_cf_node_as_block(cf_node);
171 nir_foreach_instr(instr, block) {
172 if (instr->type == nir_instr_type_call) {
173 written->modes |= nir_var_shader_out |
174 nir_var_shader_temp |
175 nir_var_function_temp |
176 nir_var_mem_ssbo |
177 nir_var_mem_shared |
178 nir_var_mem_global;
179 continue;
180 }
181
182 if (instr->type != nir_instr_type_intrinsic)
183 continue;
184
185 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
186 switch (intrin->intrinsic) {
187 case nir_intrinsic_barrier:
188 if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
189 written->modes |= nir_intrinsic_memory_modes(intrin);
190 break;
191
192 case nir_intrinsic_emit_vertex:
193 case nir_intrinsic_emit_vertex_with_counter:
194 written->modes = nir_var_shader_out;
195 break;
196
197 case nir_intrinsic_trace_ray:
198 case nir_intrinsic_execute_callable:
199 case nir_intrinsic_rt_trace_ray:
200 case nir_intrinsic_rt_execute_callable: {
201 nir_deref_instr *payload =
202 nir_src_as_deref(*nir_get_shader_call_payload_src(intrin));
203
204 nir_component_mask_t mask = (1 << glsl_get_vector_elements(payload->type)) - 1;
205
206 struct hash_entry *ht_entry =
207 _mesa_hash_table_search(written->derefs, payload);
208 if (ht_entry) {
209 ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
210 } else {
211 _mesa_hash_table_insert(written->derefs, payload,
212 (void *)(uintptr_t)mask);
213 }
214 break;
215 }
216
217 case nir_intrinsic_report_ray_intersection:
218 written->modes |= nir_var_mem_ssbo |
219 nir_var_mem_global |
220 nir_var_shader_call_data |
221 nir_var_ray_hit_attrib;
222 break;
223
224 case nir_intrinsic_ignore_ray_intersection:
225 case nir_intrinsic_terminate_ray:
226 written->modes |= nir_var_mem_ssbo |
227 nir_var_mem_global |
228 nir_var_shader_call_data;
229 break;
230
231 case nir_intrinsic_deref_atomic:
232 case nir_intrinsic_deref_atomic_swap:
233 case nir_intrinsic_store_deref:
234 case nir_intrinsic_copy_deref:
235 case nir_intrinsic_memcpy_deref: {
236 /* Destination in all of store_deref, copy_deref and the atomics is src[0]. */
237 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
238
239 uintptr_t mask = intrin->intrinsic == nir_intrinsic_store_deref ? nir_intrinsic_write_mask(intrin) : (1 << glsl_get_vector_elements(dst->type)) - 1;
240
241 struct hash_entry *ht_entry = _mesa_hash_table_search(written->derefs, dst);
242 if (ht_entry)
243 ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
244 else
245 _mesa_hash_table_insert(written->derefs, dst, (void *)mask);
246
247 break;
248 }
249
250 default:
251 break;
252 }
253 }
254
255 break;
256 }
257
258 case nir_cf_node_if: {
259 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
260
261 new_written = create_vars_written(state);
262
263 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
264 gather_vars_written(state, new_written, cf_node);
265
266 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
267 gather_vars_written(state, new_written, cf_node);
268
269 break;
270 }
271
272 case nir_cf_node_loop: {
273 nir_loop *loop = nir_cf_node_as_loop(cf_node);
274 assert(!nir_loop_has_continue_construct(loop));
275
276 new_written = create_vars_written(state);
277
278 foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
279 gather_vars_written(state, new_written, cf_node);
280
281 break;
282 }
283
284 default:
285 unreachable("Invalid CF node type");
286 }
287
288 if (new_written) {
289 /* Merge new information to the parent control flow node. */
290 if (written) {
291 written->modes |= new_written->modes;
292 hash_table_foreach(new_written->derefs, new_entry) {
293 struct hash_entry *old_entry =
294 _mesa_hash_table_search_pre_hashed(written->derefs, new_entry->hash,
295 new_entry->key);
296 if (old_entry) {
297 nir_component_mask_t merged = (uintptr_t)new_entry->data |
298 (uintptr_t)old_entry->data;
299 old_entry->data = (void *)((uintptr_t)merged);
300 } else {
301 _mesa_hash_table_insert_pre_hashed(written->derefs, new_entry->hash,
302 new_entry->key, new_entry->data);
303 }
304 }
305 }
306 _mesa_hash_table_insert(state->vars_written_map, cf_node, new_written);
307 }
308 }
309
310 /* Creates a fresh dynarray */
311 static struct copies_dynarray *
get_copies_dynarray(struct copy_prop_var_state * state)312 get_copies_dynarray(struct copy_prop_var_state *state)
313 {
314 struct copies_dynarray *cp_arr =
315 ralloc(state->mem_ctx, struct copies_dynarray);
316 util_dynarray_init(&cp_arr->arr, state->mem_ctx);
317 return cp_arr;
318 }
319
320 /* Checks if the pointer leads to a cloned copy of the array for this hash
321 * table or if the pointer was inherited from when the hash table was cloned.
322 */
323 static bool
copies_owns_ht_entry(struct copies * copies,struct hash_entry * ht_entry)324 copies_owns_ht_entry(struct copies *copies,
325 struct hash_entry *ht_entry)
326 {
327 assert(copies && ht_entry && ht_entry->data);
328 struct copies_dynarray *copies_array = ht_entry->data;
329 return copies_array->owner == copies;
330 }
331
332 static void
clone_copies_dynarray_from_src(struct copies_dynarray * dst,struct copies_dynarray * src)333 clone_copies_dynarray_from_src(struct copies_dynarray *dst,
334 struct copies_dynarray *src)
335 {
336 util_dynarray_append_dynarray(&dst->arr, &src->arr);
337 }
338
339 /* Gets copies array from the hash table entry or clones the source array if
340 * the hash entry contains NULL. The values are not cloned when the hash table
341 * is created because its expensive to clone everything and most value will
342 * never actually be accessed.
343 */
344 static struct copies_dynarray *
get_copies_array_from_ht_entry(struct copy_prop_var_state * state,struct copies * copies,struct hash_entry * ht_entry)345 get_copies_array_from_ht_entry(struct copy_prop_var_state *state,
346 struct copies *copies,
347 struct hash_entry *ht_entry)
348 {
349 struct copies_dynarray *copies_array;
350 if (copies_owns_ht_entry(copies, ht_entry)) {
351 /* The array already exists so just return it */
352 copies_array = (struct copies_dynarray *)ht_entry->data;
353 } else {
354 /* Clone the array and set the data value for future access */
355 copies_array = get_copies_dynarray(state);
356 copies_array->owner = copies;
357 clone_copies_dynarray_from_src(copies_array, ht_entry->data);
358 ht_entry->data = copies_array;
359 }
360
361 return copies_array;
362 }
363
364 static struct copies_dynarray *
copies_array_for_var(struct copy_prop_var_state * state,struct copies * copies,nir_variable * var)365 copies_array_for_var(struct copy_prop_var_state *state,
366 struct copies *copies, nir_variable *var)
367 {
368 struct hash_entry *entry = _mesa_hash_table_search(copies->ht, var);
369 if (entry != NULL)
370 return get_copies_array_from_ht_entry(state, copies, entry);
371
372 struct copies_dynarray *copies_array = get_copies_dynarray(state);
373 copies_array->owner = copies;
374 _mesa_hash_table_insert(copies->ht, var, copies_array);
375
376 return copies_array;
377 }
378
379 static struct util_dynarray *
copies_array_for_deref(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref)380 copies_array_for_deref(struct copy_prop_var_state *state,
381 struct copies *copies, nir_deref_and_path *deref)
382 {
383 nir_get_deref_path(state->mem_ctx, deref);
384
385 struct util_dynarray *copies_array;
386 if (deref->_path->path[0]->deref_type != nir_deref_type_var) {
387 copies_array = &copies->arr;
388 } else {
389 struct copies_dynarray *cpda =
390 copies_array_for_var(state, copies, deref->_path->path[0]->var);
391 copies_array = &cpda->arr;
392 }
393
394 return copies_array;
395 }
396
397 static struct copy_entry *
copy_entry_create(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref)398 copy_entry_create(struct copy_prop_var_state *state,
399 struct copies *copies, nir_deref_and_path *deref)
400 {
401 struct util_dynarray *copies_array =
402 copies_array_for_deref(state, copies, deref);
403
404 struct copy_entry new_entry = {
405 .dst = *deref,
406 };
407 util_dynarray_append(copies_array, struct copy_entry, new_entry);
408 return util_dynarray_top_ptr(copies_array, struct copy_entry);
409 }
410
411 /* Remove copy entry by swapping it with the last element and reducing the
412 * size. If used inside an iteration on copies, it must be a reverse
413 * (backwards) iteration. It is safe to use in those cases because the swap
414 * will not affect the rest of the iteration.
415 */
416 static void
copy_entry_remove(struct util_dynarray * copies,struct copy_entry * entry,struct copy_entry ** relocated_entry)417 copy_entry_remove(struct util_dynarray *copies,
418 struct copy_entry *entry,
419 struct copy_entry **relocated_entry)
420 {
421 const struct copy_entry *src =
422 util_dynarray_pop_ptr(copies, struct copy_entry);
423
424 /* Because we're removing elements from an array, pointers to those
425 * elements are not stable as we modify the array.
426 * If relocated_entry != NULL, it's points to an entry we saved off earlier
427 * and want to keep pointing to the right spot.
428 */
429 if (relocated_entry && *relocated_entry == src)
430 *relocated_entry = entry;
431
432 if (src != entry)
433 *entry = *src;
434 }
435
436 static bool
is_array_deref_of_vector(const nir_deref_and_path * deref)437 is_array_deref_of_vector(const nir_deref_and_path *deref)
438 {
439 if (deref->instr->deref_type != nir_deref_type_array)
440 return false;
441 nir_deref_instr *parent = nir_deref_instr_parent(deref->instr);
442 return glsl_type_is_vector(parent->type);
443 }
444
445 static struct copy_entry *
lookup_entry_for_deref(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,nir_deref_compare_result allowed_comparisons,bool * equal)446 lookup_entry_for_deref(struct copy_prop_var_state *state,
447 struct copies *copies,
448 nir_deref_and_path *deref,
449 nir_deref_compare_result allowed_comparisons,
450 bool *equal)
451 {
452 struct util_dynarray *copies_array =
453 copies_array_for_deref(state, copies, deref);
454
455 struct copy_entry *entry = NULL;
456 util_dynarray_foreach(copies_array, struct copy_entry, iter) {
457 nir_deref_compare_result result =
458 nir_compare_derefs_and_paths(state->mem_ctx, &iter->dst, deref);
459 if (result & allowed_comparisons) {
460 entry = iter;
461 if (result & nir_derefs_equal_bit) {
462 if (equal != NULL)
463 *equal = true;
464 break;
465 }
466 /* Keep looking in case we have an equal match later in the array. */
467 }
468 }
469
470 return entry;
471 }
472
473 static void
lookup_entry_and_kill_aliases_copy_array(struct copy_prop_var_state * state,struct util_dynarray * copies_array,nir_deref_and_path * deref,unsigned write_mask,bool remove_entry,struct copy_entry ** entry,bool * entry_removed)474 lookup_entry_and_kill_aliases_copy_array(struct copy_prop_var_state *state,
475 struct util_dynarray *copies_array,
476 nir_deref_and_path *deref,
477 unsigned write_mask,
478 bool remove_entry,
479 struct copy_entry **entry,
480 bool *entry_removed)
481 {
482 util_dynarray_foreach_reverse(copies_array, struct copy_entry, iter) {
483 nir_deref_compare_result comp =
484 nir_compare_derefs_and_paths(state->mem_ctx, &iter->dst, deref);
485
486 if (comp & nir_derefs_equal_bit) {
487 /* Make sure it is unique. */
488 assert(!*entry && !*entry_removed);
489 if (remove_entry) {
490 copy_entry_remove(copies_array, iter, NULL);
491 *entry_removed = true;
492 } else {
493 *entry = iter;
494 }
495 } else if (comp & nir_derefs_may_alias_bit) {
496 copy_entry_remove(copies_array, iter, entry);
497 }
498 }
499 }
500
501 static struct copy_entry *
lookup_entry_and_kill_aliases(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,unsigned write_mask,bool remove_entry)502 lookup_entry_and_kill_aliases(struct copy_prop_var_state *state,
503 struct copies *copies,
504 nir_deref_and_path *deref,
505 unsigned write_mask,
506 bool remove_entry)
507 {
508 /* TODO: Take into account the write_mask. */
509
510 bool UNUSED entry_removed = false;
511 struct copy_entry *entry = NULL;
512
513 nir_get_deref_path(state->mem_ctx, deref);
514
515 /* For any other variable types if the variables are different,
516 * they don't alias. So we only need to compare different vars and loop
517 * over the hash table for ssbos and shared vars.
518 */
519 if (deref->_path->path[0]->deref_type != nir_deref_type_var ||
520 deref->_path->path[0]->var->data.mode == nir_var_mem_ssbo ||
521 deref->_path->path[0]->var->data.mode == nir_var_mem_shared) {
522
523 hash_table_foreach(copies->ht, ht_entry) {
524 nir_variable *var = (nir_variable *)ht_entry->key;
525 if (deref->_path->path[0]->deref_type == nir_deref_type_var &&
526 var->data.mode != deref->_path->path[0]->var->data.mode)
527 continue;
528
529 struct copies_dynarray *copies_array =
530 get_copies_array_from_ht_entry(state, copies, ht_entry);
531
532 lookup_entry_and_kill_aliases_copy_array(state, &copies_array->arr,
533 deref, write_mask,
534 remove_entry, &entry,
535 &entry_removed);
536
537 if (copies_array->arr.size == 0) {
538 _mesa_hash_table_remove(copies->ht, ht_entry);
539 }
540 }
541
542 lookup_entry_and_kill_aliases_copy_array(state, &copies->arr, deref,
543 write_mask, remove_entry,
544 &entry, &entry_removed);
545 } else {
546 struct copies_dynarray *cpda =
547 copies_array_for_var(state, copies, deref->_path->path[0]->var);
548 struct util_dynarray *copies_array = &cpda->arr;
549
550 lookup_entry_and_kill_aliases_copy_array(state, copies_array, deref,
551 write_mask, remove_entry,
552 &entry, &entry_removed);
553
554 if (copies_array->size == 0) {
555 _mesa_hash_table_remove_key(copies->ht, deref->_path->path[0]->var);
556 }
557 }
558
559 return entry;
560 }
561
562 static void
kill_aliases(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,unsigned write_mask)563 kill_aliases(struct copy_prop_var_state *state,
564 struct copies *copies,
565 nir_deref_and_path *deref,
566 unsigned write_mask)
567 {
568 /* TODO: Take into account the write_mask. */
569
570 lookup_entry_and_kill_aliases(state, copies, deref, write_mask, true);
571 }
572
573 static struct copy_entry *
get_entry_and_kill_aliases(struct copy_prop_var_state * state,struct copies * copies,nir_deref_and_path * deref,unsigned write_mask)574 get_entry_and_kill_aliases(struct copy_prop_var_state *state,
575 struct copies *copies,
576 nir_deref_and_path *deref,
577 unsigned write_mask)
578 {
579 /* TODO: Take into account the write_mask. */
580
581 struct copy_entry *entry =
582 lookup_entry_and_kill_aliases(state, copies, deref, write_mask, false);
583 if (entry == NULL)
584 entry = copy_entry_create(state, copies, deref);
585
586 return entry;
587 }
588
589 static void
apply_barrier_for_modes_to_dynarr(struct util_dynarray * copies_array,nir_variable_mode modes)590 apply_barrier_for_modes_to_dynarr(struct util_dynarray *copies_array,
591 nir_variable_mode modes)
592 {
593 util_dynarray_foreach_reverse(copies_array, struct copy_entry, iter) {
594 if (nir_deref_mode_may_be(iter->dst.instr, modes) ||
595 (!iter->src.is_ssa && nir_deref_mode_may_be(iter->src.deref.instr, modes)))
596 copy_entry_remove(copies_array, iter, NULL);
597 }
598 }
599
600 static void
apply_barrier_for_modes(struct copy_prop_var_state * state,struct copies * copies,nir_variable_mode modes)601 apply_barrier_for_modes(struct copy_prop_var_state *state,
602 struct copies *copies, nir_variable_mode modes)
603 {
604 hash_table_foreach(copies->ht, ht_entry) {
605 struct copies_dynarray *copies_array =
606 get_copies_array_from_ht_entry(state, copies, ht_entry);
607
608 apply_barrier_for_modes_to_dynarr(&copies_array->arr, modes);
609 }
610
611 apply_barrier_for_modes_to_dynarr(&copies->arr, modes);
612 }
613
614 static void
value_set_from_value(struct value * value,const struct value * from,unsigned base_index,unsigned write_mask)615 value_set_from_value(struct value *value, const struct value *from,
616 unsigned base_index, unsigned write_mask)
617 {
618 /* We can't have non-zero indexes with non-trivial write masks */
619 assert(base_index == 0 || write_mask == 1);
620
621 if (from->is_ssa) {
622 /* Clear value if it was being used as non-SSA. */
623 value->is_ssa = true;
624 /* Only overwrite the written components */
625 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
626 if (write_mask & (1 << i)) {
627 value->ssa.def[base_index + i] = from->ssa.def[i];
628 value->ssa.component[base_index + i] = from->ssa.component[i];
629 }
630 }
631 } else {
632 /* Non-ssa stores always write everything */
633 value->is_ssa = false;
634 value->deref = from->deref;
635 }
636 }
637
638 /* Try to load a single element of a vector from the copy_entry. If the data
639 * isn't available, just let the original intrinsic do the work.
640 */
641 static bool
load_element_from_ssa_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,struct value * value,unsigned index)642 load_element_from_ssa_entry_value(struct copy_prop_var_state *state,
643 struct copy_entry *entry,
644 nir_builder *b, nir_intrinsic_instr *intrin,
645 struct value *value, unsigned index)
646 {
647 assert(index < glsl_get_vector_elements(entry->dst.instr->type));
648
649 /* We don't have the element available, so let the instruction do the work. */
650 if (!entry->src.ssa.def[index])
651 return false;
652
653 b->cursor = nir_instr_remove(&intrin->instr);
654 intrin->instr.block = NULL;
655
656 assert(entry->src.ssa.component[index] <
657 entry->src.ssa.def[index]->num_components);
658 nir_def *def = nir_channel(b, entry->src.ssa.def[index],
659 entry->src.ssa.component[index]);
660
661 *value = (struct value){
662 .is_ssa = true,
663 {
664 .ssa = {
665 .def = { def },
666 .component = { 0 },
667 },
668 }
669 };
670
671 return true;
672 }
673
674 /* Do a "load" from an SSA-based entry return it in "value" as a value with a
675 * single SSA def. Because an entry could reference multiple different SSA
676 * defs, a vecN operation may be inserted to combine them into a single SSA
677 * def before handing it back to the caller. If the load instruction is no
678 * longer needed, it is removed and nir_instr::block is set to NULL. (It is
679 * possible, in some cases, for the load to be used in the vecN operation in
680 * which case it isn't deleted.)
681 */
682 static bool
load_from_ssa_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_and_path * src,struct value * value)683 load_from_ssa_entry_value(struct copy_prop_var_state *state,
684 struct copy_entry *entry,
685 nir_builder *b, nir_intrinsic_instr *intrin,
686 nir_deref_and_path *src, struct value *value)
687 {
688 if (is_array_deref_of_vector(src)) {
689 if (nir_src_is_const(src->instr->arr.index)) {
690 unsigned index = nir_src_as_uint(src->instr->arr.index);
691 return load_element_from_ssa_entry_value(state, entry, b, intrin,
692 value, index);
693 }
694
695 /* An SSA copy_entry for the vector won't help indirect load. */
696 if (glsl_type_is_vector(entry->dst.instr->type)) {
697 assert(entry->dst.instr->type == nir_deref_instr_parent(src->instr)->type);
698 /* TODO: If all SSA entries are there, try an if-ladder. */
699 return false;
700 }
701 }
702
703 *value = entry->src;
704
705 const struct glsl_type *type = entry->dst.instr->type;
706 unsigned num_components = glsl_get_vector_elements(type);
707
708 nir_component_mask_t available = 0;
709 bool all_same = true;
710 for (unsigned i = 0; i < num_components; i++) {
711 if (value->ssa.def[i])
712 available |= (1 << i);
713
714 if (value->ssa.def[i] != value->ssa.def[0])
715 all_same = false;
716
717 if (value->ssa.component[i] != i)
718 all_same = false;
719 }
720
721 if (all_same) {
722 /* Our work here is done */
723 b->cursor = nir_instr_remove(&intrin->instr);
724 intrin->instr.block = NULL;
725 return true;
726 }
727
728 if (available != (1 << num_components) - 1 &&
729 intrin->intrinsic == nir_intrinsic_load_deref &&
730 (available & nir_def_components_read(&intrin->def)) == 0) {
731 /* If none of the components read are available as SSA values, then we
732 * should just bail. Otherwise, we would end up replacing the uses of
733 * the load_deref a vecN() that just gathers up its components.
734 */
735 return false;
736 }
737
738 b->cursor = nir_after_instr(&intrin->instr);
739
740 nir_def *load_def =
741 intrin->intrinsic == nir_intrinsic_load_deref ? &intrin->def : NULL;
742
743 bool keep_intrin = false;
744 nir_scalar comps[NIR_MAX_VEC_COMPONENTS];
745 for (unsigned i = 0; i < num_components; i++) {
746 if (value->ssa.def[i]) {
747 comps[i] = nir_get_scalar(value->ssa.def[i], value->ssa.component[i]);
748 } else {
749 /* We don't have anything for this component in our
750 * list. Just re-use a channel from the load.
751 */
752 if (load_def == NULL)
753 load_def = nir_load_deref(b, entry->dst.instr);
754
755 if (load_def->parent_instr == &intrin->instr)
756 keep_intrin = true;
757
758 comps[i] = nir_get_scalar(load_def, i);
759 }
760 }
761
762 nir_def *vec = nir_vec_scalars(b, comps, num_components);
763 value_set_ssa_components(value, vec, num_components);
764
765 if (!keep_intrin) {
766 /* Removing this instruction should not touch the cursor because we
767 * created the cursor after the intrinsic and have added at least one
768 * instruction (the vec) since then.
769 */
770 assert(b->cursor.instr != &intrin->instr);
771 nir_instr_remove(&intrin->instr);
772 intrin->instr.block = NULL;
773 }
774
775 return true;
776 }
777
778 /**
779 * Specialize the wildcards in a deref chain
780 *
781 * This function returns a deref chain identical to \param deref except that
782 * some of its wildcards are replaced with indices from \param specific. The
783 * process is guided by \param guide which references the same type as \param
784 * specific but has the same wildcard array lengths as \param deref.
785 */
786 static nir_deref_instr *
specialize_wildcards(nir_builder * b,nir_deref_path * deref,nir_deref_path * guide,nir_deref_path * specific)787 specialize_wildcards(nir_builder *b,
788 nir_deref_path *deref,
789 nir_deref_path *guide,
790 nir_deref_path *specific)
791 {
792 nir_deref_instr **deref_p = &deref->path[1];
793 nir_deref_instr *ret_tail = deref->path[0];
794 for (; *deref_p; deref_p++) {
795 if ((*deref_p)->deref_type == nir_deref_type_array_wildcard)
796 break;
797 ret_tail = *deref_p;
798 }
799
800 nir_deref_instr **guide_p = &guide->path[1];
801 nir_deref_instr **spec_p = &specific->path[1];
802 for (; *deref_p; deref_p++) {
803 if ((*deref_p)->deref_type == nir_deref_type_array_wildcard) {
804 /* This is where things get tricky. We have to search through
805 * the entry deref to find its corresponding wildcard and fill
806 * this slot in with the value from the src.
807 */
808 while (*guide_p &&
809 (*guide_p)->deref_type != nir_deref_type_array_wildcard) {
810 guide_p++;
811 spec_p++;
812 }
813 assert(*guide_p && *spec_p);
814
815 ret_tail = nir_build_deref_follower(b, ret_tail, *spec_p);
816
817 guide_p++;
818 spec_p++;
819 } else {
820 ret_tail = nir_build_deref_follower(b, ret_tail, *deref_p);
821 }
822 }
823
824 return ret_tail;
825 }
826
827 /* Do a "load" from an deref-based entry return it in "value" as a value. The
828 * deref returned in "value" will always be a fresh copy so the caller can
829 * steal it and assign it to the instruction directly without copying it
830 * again.
831 */
832 static bool
load_from_deref_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_and_path * src,struct value * value)833 load_from_deref_entry_value(struct copy_prop_var_state *state,
834 struct copy_entry *entry,
835 nir_builder *b, nir_intrinsic_instr *intrin,
836 nir_deref_and_path *src, struct value *value)
837 {
838 *value = entry->src;
839
840 b->cursor = nir_instr_remove(&intrin->instr);
841
842 nir_deref_path *entry_dst_path = nir_get_deref_path(state->mem_ctx, &entry->dst);
843 nir_deref_path *src_path = nir_get_deref_path(state->mem_ctx, src);
844
845 bool need_to_specialize_wildcards = false;
846 nir_deref_instr **entry_p = &entry_dst_path->path[1];
847 nir_deref_instr **src_p = &src_path->path[1];
848 while (*entry_p && *src_p) {
849 nir_deref_instr *entry_tail = *entry_p++;
850 nir_deref_instr *src_tail = *src_p++;
851
852 if (src_tail->deref_type == nir_deref_type_array &&
853 entry_tail->deref_type == nir_deref_type_array_wildcard)
854 need_to_specialize_wildcards = true;
855 }
856
857 /* If the entry deref is longer than the source deref then it refers to a
858 * smaller type and we can't source from it.
859 */
860 assert(*entry_p == NULL);
861
862 value->deref._path = NULL;
863
864 if (need_to_specialize_wildcards) {
865 /* The entry has some wildcards that are not in src. This means we need
866 * to construct a new deref based on the entry but using the wildcards
867 * from the source and guided by the entry dst. Oof.
868 */
869 nir_deref_path *entry_src_path =
870 nir_get_deref_path(state->mem_ctx, &entry->src.deref);
871 value->deref.instr = specialize_wildcards(b, entry_src_path,
872 entry_dst_path, src_path);
873 }
874
875 /* If our source deref is longer than the entry deref, that's ok because
876 * it just means the entry deref needs to be extended a bit.
877 */
878 while (*src_p) {
879 nir_deref_instr *src_tail = *src_p++;
880 value->deref.instr = nir_build_deref_follower(b, value->deref.instr, src_tail);
881 }
882
883 return true;
884 }
885
886 static bool
try_load_from_entry(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_and_path * src,struct value * value)887 try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
888 nir_builder *b, nir_intrinsic_instr *intrin,
889 nir_deref_and_path *src, struct value *value)
890 {
891 if (entry == NULL)
892 return false;
893
894 if (entry->src.is_ssa) {
895 return load_from_ssa_entry_value(state, entry, b, intrin, src, value);
896 } else {
897 return load_from_deref_entry_value(state, entry, b, intrin, src, value);
898 }
899 }
900
901 static void
invalidate_copies_for_cf_node(struct copy_prop_var_state * state,struct copies * copies,nir_cf_node * cf_node)902 invalidate_copies_for_cf_node(struct copy_prop_var_state *state,
903 struct copies *copies,
904 nir_cf_node *cf_node)
905 {
906 struct hash_entry *ht_entry = _mesa_hash_table_search(state->vars_written_map, cf_node);
907 assert(ht_entry);
908
909 struct vars_written *written = ht_entry->data;
910 if (written->modes) {
911 hash_table_foreach(copies->ht, ht_entry) {
912 struct copies_dynarray *copies_array =
913 get_copies_array_from_ht_entry(state, copies, ht_entry);
914
915 util_dynarray_foreach_reverse(&copies_array->arr, struct copy_entry, entry) {
916 if (nir_deref_mode_may_be(entry->dst.instr, written->modes))
917 copy_entry_remove(&copies_array->arr, entry, NULL);
918 }
919
920 if (copies_array->arr.size == 0) {
921 _mesa_hash_table_remove(copies->ht, ht_entry);
922 }
923 }
924
925 util_dynarray_foreach_reverse(&copies->arr, struct copy_entry, entry) {
926 if (nir_deref_mode_may_be(entry->dst.instr, written->modes))
927 copy_entry_remove(&copies->arr, entry, NULL);
928 }
929 }
930
931 hash_table_foreach(written->derefs, entry) {
932 nir_deref_instr *deref_written = (nir_deref_instr *)entry->key;
933 nir_deref_and_path deref = { deref_written, NULL };
934 kill_aliases(state, copies, &deref, (uintptr_t)entry->data);
935 }
936 }
937
938 static void
print_value(struct value * value,unsigned num_components)939 print_value(struct value *value, unsigned num_components)
940 {
941 bool same_ssa = true;
942 for (unsigned i = 0; i < num_components; i++) {
943 if (value->ssa.component[i] != i ||
944 (i > 0 && value->ssa.def[i - 1] != value->ssa.def[i])) {
945 same_ssa = false;
946 break;
947 }
948 }
949 if (same_ssa) {
950 printf(" ssa_%d", value->ssa.def[0]->index);
951 } else {
952 for (int i = 0; i < num_components; i++) {
953 if (value->ssa.def[i])
954 printf(" ssa_%d[%u]", value->ssa.def[i]->index, value->ssa.component[i]);
955 else
956 printf(" _");
957 }
958 }
959 }
960
961 static void
print_copy_entry(struct copy_entry * entry)962 print_copy_entry(struct copy_entry *entry)
963 {
964 printf(" %s ", glsl_get_type_name(entry->dst.instr->type));
965 nir_print_deref(entry->dst.instr, stdout);
966 printf(":\t");
967
968 unsigned num_components = glsl_get_vector_elements(entry->dst.instr->type);
969 print_value(&entry->src, num_components);
970 printf("\n");
971 }
972
973 static void
dump_instr(nir_instr * instr)974 dump_instr(nir_instr *instr)
975 {
976 printf(" ");
977 nir_print_instr(instr, stdout);
978 printf("\n");
979 }
980
981 static void
dump_copy_entries(struct copies * copies)982 dump_copy_entries(struct copies *copies)
983 {
984 hash_table_foreach(copies->ht, ht_entry) {
985 struct util_dynarray *copies_array =
986 &((struct copies_dynarray *)ht_entry->data)->arr;
987
988 util_dynarray_foreach(copies_array, struct copy_entry, iter)
989 print_copy_entry(iter);
990 }
991
992 util_dynarray_foreach(&copies->arr, struct copy_entry, iter)
993 print_copy_entry(iter);
994
995 printf("\n");
996 }
997
998 static void
copy_prop_vars_block(struct copy_prop_var_state * state,nir_builder * b,nir_block * block,struct copies * copies)999 copy_prop_vars_block(struct copy_prop_var_state *state,
1000 nir_builder *b, nir_block *block,
1001 struct copies *copies)
1002 {
1003 if (debug) {
1004 printf("# block%d\n", block->index);
1005 dump_copy_entries(copies);
1006 }
1007
1008 nir_foreach_instr_safe(instr, block) {
1009 if (debug && instr->type == nir_instr_type_deref)
1010 dump_instr(instr);
1011
1012 if (instr->type == nir_instr_type_call) {
1013 if (debug)
1014 dump_instr(instr);
1015 apply_barrier_for_modes(state, copies, nir_var_shader_out | nir_var_shader_temp | nir_var_function_temp | nir_var_mem_ssbo | nir_var_mem_shared | nir_var_mem_global);
1016 if (debug)
1017 dump_copy_entries(copies);
1018 continue;
1019 }
1020
1021 if (instr->type != nir_instr_type_intrinsic)
1022 continue;
1023
1024 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1025 switch (intrin->intrinsic) {
1026 case nir_intrinsic_barrier:
1027 if (debug)
1028 dump_instr(instr);
1029
1030 if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
1031 apply_barrier_for_modes(state, copies, nir_intrinsic_memory_modes(intrin));
1032 break;
1033
1034 case nir_intrinsic_emit_vertex:
1035 case nir_intrinsic_emit_vertex_with_counter:
1036 if (debug)
1037 dump_instr(instr);
1038
1039 apply_barrier_for_modes(state, copies, nir_var_shader_out);
1040 break;
1041
1042 case nir_intrinsic_report_ray_intersection:
1043 apply_barrier_for_modes(state, copies, nir_var_mem_ssbo | nir_var_mem_global | nir_var_shader_call_data | nir_var_ray_hit_attrib);
1044 break;
1045
1046 case nir_intrinsic_ignore_ray_intersection:
1047 case nir_intrinsic_terminate_ray:
1048 apply_barrier_for_modes(state, copies, nir_var_mem_ssbo | nir_var_mem_global | nir_var_shader_call_data);
1049 break;
1050
1051 case nir_intrinsic_load_deref: {
1052 if (debug)
1053 dump_instr(instr);
1054
1055 if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE)
1056 break;
1057
1058 nir_deref_and_path src = { nir_src_as_deref(intrin->src[0]), NULL };
1059
1060 /* If this is a load from a read-only mode, then all this pass would
1061 * do is combine redundant loads and CSE should be more efficient for
1062 * that.
1063 */
1064 nir_variable_mode ignore = nir_var_read_only_modes & ~nir_var_vec_indexable_modes;
1065 if (nir_deref_mode_must_be(src.instr, ignore))
1066 break;
1067
1068 /* Ignore trivial casts. If trivial casts are applied to array derefs of vectors,
1069 * not doing this causes is_array_deref_of_vector to (wrongly) return false. */
1070 while (src.instr->deref_type == nir_deref_type_cast &&
1071 nir_deref_instr_parent(src.instr) && nir_deref_cast_is_trivial(src.instr))
1072 src.instr = nir_deref_instr_parent(src.instr);
1073
1074 /* Direct array_derefs of vectors operate on the vectors (the parent
1075 * deref). Indirects will be handled like other derefs.
1076 */
1077 int vec_index = 0;
1078 nir_deref_and_path vec_src = src;
1079 if (is_array_deref_of_vector(&src) && nir_src_is_const(src.instr->arr.index)) {
1080 vec_src.instr = nir_deref_instr_parent(src.instr);
1081 unsigned vec_comps = glsl_get_vector_elements(vec_src.instr->type);
1082 vec_index = nir_src_as_uint(src.instr->arr.index);
1083
1084 /* Loading from an invalid index yields an undef */
1085 if (vec_index >= vec_comps) {
1086 b->cursor = nir_instr_remove(instr);
1087 nir_def *u = nir_undef(b, 1, intrin->def.bit_size);
1088 nir_def_rewrite_uses(&intrin->def, u);
1089 state->progress = true;
1090 break;
1091 }
1092 }
1093
1094 bool src_entry_equal = false;
1095 struct copy_entry *src_entry =
1096 lookup_entry_for_deref(state, copies, &src,
1097 nir_derefs_a_contains_b_bit, &src_entry_equal);
1098 struct value value = { 0 };
1099 if (try_load_from_entry(state, src_entry, b, intrin, &src, &value)) {
1100 if (value.is_ssa) {
1101 /* lookup_load has already ensured that we get a single SSA
1102 * value that has all of the channels. We just have to do the
1103 * rewrite operation. Note for array derefs of vectors, the
1104 * channel 0 is used.
1105 */
1106 if (intrin->instr.block) {
1107 /* The lookup left our instruction in-place. This means it
1108 * must have used it to vec up a bunch of different sources.
1109 * We need to be careful when rewriting uses so we don't
1110 * rewrite the vecN itself.
1111 */
1112 nir_def_rewrite_uses_after(&intrin->def,
1113 value.ssa.def[0],
1114 value.ssa.def[0]->parent_instr);
1115 } else {
1116 nir_def_rewrite_uses(&intrin->def,
1117 value.ssa.def[0]);
1118 }
1119 } else {
1120 /* We're turning it into a load of a different variable */
1121 intrin->src[0] = nir_src_for_ssa(&value.deref.instr->def);
1122
1123 /* Put it back in again. */
1124 nir_builder_instr_insert(b, instr);
1125 value_set_ssa_components(&value, &intrin->def,
1126 intrin->num_components);
1127 }
1128 state->progress = true;
1129 } else {
1130 value_set_ssa_components(&value, &intrin->def,
1131 intrin->num_components);
1132 }
1133
1134 /* Now that we have a value, we're going to store it back so that we
1135 * have the right value next time we come looking for it. In order
1136 * to do this, we need an exact match, not just something that
1137 * contains what we're looking for.
1138 *
1139 * We avoid doing another lookup if src.instr == vec_src.instr.
1140 */
1141 struct copy_entry *entry = src_entry;
1142 if (src.instr != vec_src.instr)
1143 entry = lookup_entry_for_deref(state, copies, &vec_src,
1144 nir_derefs_equal_bit, NULL);
1145 else if (!src_entry_equal)
1146 entry = NULL;
1147
1148 if (!entry)
1149 entry = copy_entry_create(state, copies, &vec_src);
1150
1151 /* Update the entry with the value of the load. This way
1152 * we can potentially remove subsequent loads.
1153 */
1154 value_set_from_value(&entry->src, &value, vec_index,
1155 (1 << intrin->num_components) - 1);
1156 break;
1157 }
1158
1159 case nir_intrinsic_store_deref: {
1160 if (debug)
1161 dump_instr(instr);
1162
1163 nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1164 assert(glsl_type_is_vector_or_scalar(dst.instr->type));
1165
1166 /* Ignore trivial casts. If trivial casts are applied to array derefs of vectors,
1167 * not doing this causes is_array_deref_of_vector to (wrongly) return false. */
1168 while (dst.instr->deref_type == nir_deref_type_cast &&
1169 nir_deref_instr_parent(dst.instr) && nir_deref_cast_is_trivial(dst.instr))
1170 dst.instr = nir_deref_instr_parent(dst.instr);
1171
1172 /* Direct array_derefs of vectors operate on the vectors (the parent
1173 * deref). Indirects will be handled like other derefs.
1174 */
1175 int vec_index = 0;
1176 nir_deref_and_path vec_dst = dst;
1177 if (is_array_deref_of_vector(&dst) && nir_src_is_const(dst.instr->arr.index)) {
1178 vec_dst.instr = nir_deref_instr_parent(dst.instr);
1179 unsigned vec_comps = glsl_get_vector_elements(vec_dst.instr->type);
1180
1181 vec_index = nir_src_as_uint(dst.instr->arr.index);
1182
1183 /* Storing to an invalid index is a no-op. */
1184 if (vec_index >= vec_comps) {
1185 nir_instr_remove(instr);
1186 state->progress = true;
1187 break;
1188 }
1189 }
1190
1191 if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE) {
1192 unsigned wrmask = nir_intrinsic_write_mask(intrin);
1193 kill_aliases(state, copies, &dst, wrmask);
1194 break;
1195 }
1196
1197 struct copy_entry *entry =
1198 lookup_entry_for_deref(state, copies, &dst, nir_derefs_equal_bit, NULL);
1199 if (entry && value_equals_store_src(&entry->src, intrin)) {
1200 /* If we are storing the value from a load of the same var the
1201 * store is redundant so remove it.
1202 */
1203 nir_instr_remove(instr);
1204 state->progress = true;
1205 } else {
1206 struct value value = { 0 };
1207 value_set_ssa_components(&value, intrin->src[1].ssa,
1208 intrin->num_components);
1209 unsigned wrmask = nir_intrinsic_write_mask(intrin);
1210 struct copy_entry *entry =
1211 get_entry_and_kill_aliases(state, copies, &vec_dst, wrmask);
1212 value_set_from_value(&entry->src, &value, vec_index, wrmask);
1213 }
1214
1215 break;
1216 }
1217
1218 case nir_intrinsic_copy_deref: {
1219 if (debug)
1220 dump_instr(instr);
1221
1222 nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1223 nir_deref_and_path src = { nir_src_as_deref(intrin->src[1]), NULL };
1224
1225 /* The copy_deref intrinsic doesn't keep track of num_components, so
1226 * get it ourselves.
1227 */
1228 unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1229 unsigned full_mask = (1 << num_components) - 1;
1230
1231 if ((nir_intrinsic_src_access(intrin) & ACCESS_VOLATILE) ||
1232 (nir_intrinsic_dst_access(intrin) & ACCESS_VOLATILE)) {
1233 kill_aliases(state, copies, &dst, full_mask);
1234 break;
1235 }
1236
1237 nir_deref_compare_result comp =
1238 nir_compare_derefs_and_paths(state->mem_ctx, &src, &dst);
1239 if (comp & nir_derefs_equal_bit) {
1240 /* This is a no-op self-copy. Get rid of it */
1241 nir_instr_remove(instr);
1242 state->progress = true;
1243 continue;
1244 }
1245
1246 /* Copy of direct array derefs of vectors are not handled. Just
1247 * invalidate what's written and bail.
1248 */
1249 if ((is_array_deref_of_vector(&src) && nir_src_is_const(src.instr->arr.index)) ||
1250 (is_array_deref_of_vector(&dst) && nir_src_is_const(dst.instr->arr.index))) {
1251 kill_aliases(state, copies, &dst, full_mask);
1252 break;
1253 }
1254
1255 struct copy_entry *src_entry =
1256 lookup_entry_for_deref(state, copies, &src, nir_derefs_a_contains_b_bit, NULL);
1257 struct value value;
1258 if (try_load_from_entry(state, src_entry, b, intrin, &src, &value)) {
1259 /* If load works, intrin (the copy_deref) is removed. */
1260 if (value.is_ssa) {
1261 nir_store_deref(b, dst.instr, value.ssa.def[0], full_mask);
1262 } else {
1263 /* If this would be a no-op self-copy, don't bother. */
1264 comp = nir_compare_derefs_and_paths(state->mem_ctx, &value.deref, &dst);
1265 if (comp & nir_derefs_equal_bit)
1266 continue;
1267
1268 /* Just turn it into a copy of a different deref */
1269 intrin->src[1] = nir_src_for_ssa(&value.deref.instr->def);
1270
1271 /* Put it back in again. */
1272 nir_builder_instr_insert(b, instr);
1273 }
1274
1275 state->progress = true;
1276 } else {
1277 value = (struct value){
1278 .is_ssa = false,
1279 { .deref = src },
1280 };
1281 }
1282
1283 nir_variable *src_var = nir_deref_instr_get_variable(src.instr);
1284 if (src_var && src_var->data.cannot_coalesce) {
1285 /* The source cannot be coaleseced, which means we can't propagate
1286 * this copy.
1287 */
1288 break;
1289 }
1290
1291 struct copy_entry *dst_entry =
1292 get_entry_and_kill_aliases(state, copies, &dst, full_mask);
1293 value_set_from_value(&dst_entry->src, &value, 0, full_mask);
1294 break;
1295 }
1296
1297 case nir_intrinsic_trace_ray:
1298 case nir_intrinsic_execute_callable:
1299 case nir_intrinsic_rt_trace_ray:
1300 case nir_intrinsic_rt_execute_callable: {
1301 if (debug)
1302 dump_instr(instr);
1303
1304 nir_deref_and_path payload = {
1305 nir_src_as_deref(*nir_get_shader_call_payload_src(intrin)), NULL
1306 };
1307 nir_component_mask_t full_mask = (1 << glsl_get_vector_elements(payload.instr->type)) - 1;
1308 kill_aliases(state, copies, &payload, full_mask);
1309 break;
1310 }
1311
1312 case nir_intrinsic_memcpy_deref:
1313 case nir_intrinsic_deref_atomic:
1314 case nir_intrinsic_deref_atomic_swap:
1315 if (debug)
1316 dump_instr(instr);
1317
1318 nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1319 unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1320 unsigned full_mask = (1 << num_components) - 1;
1321 kill_aliases(state, copies, &dst, full_mask);
1322 break;
1323
1324 case nir_intrinsic_store_deref_block_intel: {
1325 if (debug)
1326 dump_instr(instr);
1327
1328 /* Invalidate the whole variable (or cast) and anything that alias
1329 * with it.
1330 */
1331 nir_deref_and_path dst = { nir_src_as_deref(intrin->src[0]), NULL };
1332 while (nir_deref_instr_parent(dst.instr))
1333 dst.instr = nir_deref_instr_parent(dst.instr);
1334 assert(dst.instr->deref_type == nir_deref_type_var ||
1335 dst.instr->deref_type == nir_deref_type_cast);
1336
1337 unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1338 unsigned full_mask = (1 << num_components) - 1;
1339 kill_aliases(state, copies, &dst, full_mask);
1340 break;
1341 }
1342
1343 default:
1344 continue; /* To skip the debug below. */
1345 }
1346
1347 if (debug)
1348 dump_copy_entries(copies);
1349 }
1350 }
1351
1352 static void
clone_copies(struct copy_prop_var_state * state,struct copies * clones,struct copies * copies)1353 clone_copies(struct copy_prop_var_state *state, struct copies *clones,
1354 struct copies *copies)
1355 {
1356 /* Simply clone the entire hash table. This is much faster than trying to
1357 * rebuild it and is needed to avoid slow compilation of very large shaders.
1358 * If needed we will clone the data later if it is ever looked up.
1359 */
1360 assert(clones->ht == NULL);
1361 clones->ht = _mesa_hash_table_clone(copies->ht, state->mem_ctx);
1362
1363 util_dynarray_clone(&clones->arr, state->mem_ctx, &copies->arr);
1364 }
1365
1366 /* Returns an existing struct for reuse or creates a new on if they are
1367 * all in use. This greatly reduces the time spent allocating memory if we
1368 * were to just creating a fresh one each time.
1369 */
1370 static struct copies *
get_copies_structure(struct copy_prop_var_state * state)1371 get_copies_structure(struct copy_prop_var_state *state)
1372 {
1373 struct copies *copies;
1374 if (list_is_empty(&state->unused_copy_structs_list)) {
1375 copies = ralloc(state->mem_ctx, struct copies);
1376 copies->ht = NULL;
1377 util_dynarray_init(&copies->arr, state->mem_ctx);
1378 } else {
1379 copies = list_entry(state->unused_copy_structs_list.next,
1380 struct copies, node);
1381 list_del(&copies->node);
1382 }
1383
1384 return copies;
1385 }
1386
1387 static void
clear_copies_structure(struct copy_prop_var_state * state,struct copies * copies)1388 clear_copies_structure(struct copy_prop_var_state *state,
1389 struct copies *copies)
1390 {
1391 ralloc_free(copies->ht);
1392 copies->ht = NULL;
1393
1394 list_add(&copies->node, &state->unused_copy_structs_list);
1395 }
1396
1397 static void
copy_prop_vars_cf_node(struct copy_prop_var_state * state,struct copies * copies,nir_cf_node * cf_node)1398 copy_prop_vars_cf_node(struct copy_prop_var_state *state,
1399 struct copies *copies, nir_cf_node *cf_node)
1400 {
1401 switch (cf_node->type) {
1402 case nir_cf_node_function: {
1403 nir_function_impl *impl = nir_cf_node_as_function(cf_node);
1404
1405 struct copies *impl_copies = get_copies_structure(state);
1406 impl_copies->ht = _mesa_hash_table_create(state->mem_ctx,
1407 _mesa_hash_pointer,
1408 _mesa_key_pointer_equal);
1409
1410 foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
1411 copy_prop_vars_cf_node(state, impl_copies, cf_node);
1412
1413 clear_copies_structure(state, impl_copies);
1414
1415 break;
1416 }
1417
1418 case nir_cf_node_block: {
1419 nir_block *block = nir_cf_node_as_block(cf_node);
1420 nir_builder b = nir_builder_create(state->impl);
1421 copy_prop_vars_block(state, &b, block, copies);
1422 break;
1423 }
1424
1425 case nir_cf_node_if: {
1426 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
1427
1428 /* Create new hash tables for tracking vars and fill it with clones of
1429 * the copy arrays for each variable we are tracking.
1430 *
1431 * We clone the copies for each branch of the if statement. The idea is
1432 * that they both see the same state of available copies, but do not
1433 * interfere to each other.
1434 */
1435 if (!exec_list_is_empty(&if_stmt->then_list)) {
1436 struct copies *then_copies = get_copies_structure(state);
1437 clone_copies(state, then_copies, copies);
1438
1439 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
1440 copy_prop_vars_cf_node(state, then_copies, cf_node);
1441
1442 clear_copies_structure(state, then_copies);
1443 }
1444
1445 if (!exec_list_is_empty(&if_stmt->else_list)) {
1446 struct copies *else_copies = get_copies_structure(state);
1447 clone_copies(state, else_copies, copies);
1448
1449 foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
1450 copy_prop_vars_cf_node(state, else_copies, cf_node);
1451
1452 clear_copies_structure(state, else_copies);
1453 }
1454
1455 /* Both branches copies can be ignored, since the effect of running both
1456 * branches was captured in the first pass that collects vars_written.
1457 */
1458
1459 invalidate_copies_for_cf_node(state, copies, cf_node);
1460
1461 break;
1462 }
1463
1464 case nir_cf_node_loop: {
1465 nir_loop *loop = nir_cf_node_as_loop(cf_node);
1466 assert(!nir_loop_has_continue_construct(loop));
1467
1468 /* Invalidate before cloning the copies for the loop, since the loop
1469 * body can be executed more than once.
1470 */
1471
1472 invalidate_copies_for_cf_node(state, copies, cf_node);
1473
1474 struct copies *loop_copies = get_copies_structure(state);
1475 clone_copies(state, loop_copies, copies);
1476
1477 foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
1478 copy_prop_vars_cf_node(state, loop_copies, cf_node);
1479
1480 clear_copies_structure(state, loop_copies);
1481
1482 break;
1483 }
1484
1485 default:
1486 unreachable("Invalid CF node type");
1487 }
1488 }
1489
1490 static bool
nir_copy_prop_vars_impl(nir_function_impl * impl)1491 nir_copy_prop_vars_impl(nir_function_impl *impl)
1492 {
1493 void *mem_ctx = ralloc_context(NULL);
1494
1495 if (debug) {
1496 nir_metadata_require(impl, nir_metadata_block_index);
1497 printf("## nir_copy_prop_vars_impl for %s\n", impl->function->name);
1498 }
1499
1500 struct copy_prop_var_state state = {
1501 .impl = impl,
1502 .mem_ctx = mem_ctx,
1503 .lin_ctx = linear_context(mem_ctx),
1504
1505 .vars_written_map = _mesa_pointer_hash_table_create(mem_ctx),
1506 };
1507 list_inithead(&state.unused_copy_structs_list);
1508
1509 gather_vars_written(&state, NULL, &impl->cf_node);
1510
1511 copy_prop_vars_cf_node(&state, NULL, &impl->cf_node);
1512
1513 if (state.progress) {
1514 nir_metadata_preserve(impl, nir_metadata_control_flow);
1515 } else {
1516 nir_metadata_preserve(impl, nir_metadata_all);
1517 }
1518
1519 ralloc_free(mem_ctx);
1520 return state.progress;
1521 }
1522
1523 bool
nir_opt_copy_prop_vars(nir_shader * shader)1524 nir_opt_copy_prop_vars(nir_shader *shader)
1525 {
1526 bool progress = false;
1527
1528 nir_foreach_function_impl(impl, shader) {
1529 progress |= nir_copy_prop_vars_impl(impl);
1530 }
1531
1532 return progress;
1533 }
1534