xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_barriers.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_worklist.h"
26 #include "util/u_vector.h"
27 
28 static bool
combine_all_barriers(nir_intrinsic_instr * a,nir_intrinsic_instr * b,void * _)29 combine_all_barriers(nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *_)
30 {
31    nir_intrinsic_set_memory_modes(
32       a, nir_intrinsic_memory_modes(a) | nir_intrinsic_memory_modes(b));
33    nir_intrinsic_set_memory_semantics(
34       a, nir_intrinsic_memory_semantics(a) | nir_intrinsic_memory_semantics(b));
35    nir_intrinsic_set_memory_scope(
36       a, MAX2(nir_intrinsic_memory_scope(a), nir_intrinsic_memory_scope(b)));
37    nir_intrinsic_set_execution_scope(
38       a, MAX2(nir_intrinsic_execution_scope(a), nir_intrinsic_execution_scope(b)));
39    return true;
40 }
41 
42 static bool
nir_opt_combine_barriers_impl(nir_function_impl * impl,nir_combine_barrier_cb combine_cb,void * data)43 nir_opt_combine_barriers_impl(nir_function_impl *impl,
44                               nir_combine_barrier_cb combine_cb,
45                               void *data)
46 {
47    bool progress = false;
48 
49    nir_foreach_block(block, impl) {
50       nir_intrinsic_instr *prev = NULL;
51 
52       nir_foreach_instr_safe(instr, block) {
53          if (instr->type != nir_instr_type_intrinsic) {
54             prev = NULL;
55             continue;
56          }
57 
58          nir_intrinsic_instr *current = nir_instr_as_intrinsic(instr);
59          if (current->intrinsic != nir_intrinsic_barrier) {
60             prev = NULL;
61             continue;
62          }
63 
64          if (prev && combine_cb(prev, current, data)) {
65             nir_instr_remove(&current->instr);
66             progress = true;
67          } else {
68             prev = current;
69          }
70       }
71    }
72 
73    if (progress) {
74       nir_metadata_preserve(impl, nir_metadata_control_flow |
75                                      nir_metadata_live_defs);
76    } else {
77       nir_metadata_preserve(impl, nir_metadata_all);
78    }
79 
80    return progress;
81 }
82 
83 /* Combine adjacent scoped barriers. */
84 bool
nir_opt_combine_barriers(nir_shader * shader,nir_combine_barrier_cb combine_cb,void * data)85 nir_opt_combine_barriers(nir_shader *shader,
86                          nir_combine_barrier_cb combine_cb,
87                          void *data)
88 {
89    /* Default to combining everything. Only some backends can do better. */
90    if (!combine_cb)
91       combine_cb = combine_all_barriers;
92 
93    bool progress = false;
94 
95    nir_foreach_function_impl(impl, shader) {
96       if (nir_opt_combine_barriers_impl(impl, combine_cb, data)) {
97          progress = true;
98       }
99    }
100 
101    return progress;
102 }
103 
104 static bool
barrier_happens_before(const nir_instr * a,const nir_instr * b)105 barrier_happens_before(const nir_instr *a, const nir_instr *b)
106 {
107    if (a->block == b->block)
108       return a->index < b->index;
109 
110    return nir_block_dominates(a->block, b->block);
111 }
112 
113 static bool
nir_opt_barrier_modes_impl(nir_function_impl * impl)114 nir_opt_barrier_modes_impl(nir_function_impl *impl)
115 {
116    bool progress = false;
117 
118    nir_instr_worklist *barriers = nir_instr_worklist_create();
119    if (!barriers)
120       return false;
121 
122    struct u_vector mem_derefs;
123    if (!u_vector_init(&mem_derefs, 32, sizeof(struct nir_instr *))) {
124       nir_instr_worklist_destroy(barriers);
125       return false;
126    }
127 
128    const unsigned all_memory_modes = nir_var_image |
129                                      nir_var_mem_ssbo |
130                                      nir_var_mem_shared |
131                                      nir_var_mem_global;
132 
133    nir_foreach_block_safe(block, impl) {
134       nir_foreach_instr_safe(instr, block) {
135          if (instr->type == nir_instr_type_intrinsic) {
136             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
137 
138             if (intrin->intrinsic == nir_intrinsic_barrier)
139                nir_instr_worklist_push_tail(barriers, instr);
140 
141          } else if (instr->type == nir_instr_type_deref) {
142             nir_deref_instr *deref = nir_instr_as_deref(instr);
143 
144             if (nir_deref_mode_may_be(deref, all_memory_modes) ||
145                 glsl_contains_atomic(deref->type)) {
146                nir_deref_instr **tail = u_vector_add(&mem_derefs);
147                *tail = deref;
148             }
149          }
150       }
151    }
152 
153    nir_foreach_instr_in_worklist(instr, barriers) {
154       nir_intrinsic_instr *barrier = nir_instr_as_intrinsic(instr);
155 
156       const unsigned barrier_modes = nir_intrinsic_memory_modes(barrier);
157       unsigned new_modes = barrier_modes & ~all_memory_modes;
158 
159       /* If a barrier dominates all memory accesses for a particular mode (or
160        * there are none), then the barrier cannot affect those accesses.  We
161        * can drop that mode from the barrier.
162        *
163        * For each barrier, we look at the list of memory derefs, and see if
164        * the barrier fails to dominate the deref.  If so, then there's at
165        * least one memory access that may happen before the barrier, so we
166        * need to keep the mode.  Any modes not kept are discarded.
167        */
168       nir_deref_instr **p_deref;
169       u_vector_foreach(p_deref, &mem_derefs) {
170          nir_deref_instr *deref = *p_deref;
171          const unsigned atomic_mode =
172             glsl_contains_atomic(deref->type) ? nir_var_mem_ssbo : 0;
173          const unsigned deref_modes =
174             (deref->modes | atomic_mode) & barrier_modes;
175 
176          if (deref_modes &&
177              !barrier_happens_before(&barrier->instr, &deref->instr))
178             new_modes |= deref_modes;
179       }
180 
181       /* If we don't need all the modes, update the barrier. */
182       if (barrier_modes != new_modes) {
183          nir_intrinsic_set_memory_modes(barrier, new_modes);
184          progress = true;
185       }
186 
187       /* Shared memory only exists within a workgroup, so synchronizing it
188        * beyond workgroup scope is nonsense.
189        */
190       if (nir_intrinsic_execution_scope(barrier) == SCOPE_NONE &&
191           new_modes == nir_var_mem_shared) {
192          nir_intrinsic_set_memory_scope(barrier,
193             MIN2(nir_intrinsic_memory_scope(barrier), SCOPE_WORKGROUP));
194          progress = true;
195       }
196    }
197 
198    nir_instr_worklist_destroy(barriers);
199    u_vector_finish(&mem_derefs);
200 
201    return progress;
202 }
203 
204 /**
205  * Reduce barriers to remove unnecessary modes and scope.
206  *
207  * This pass must be called before nir_lower_explicit_io lowers derefs!
208  *
209  * Many shaders issue full memory barriers, which may need to synchronize
210  * access to images, SSBOs, shared local memory, or global memory.  However,
211  * many of them only use a subset of those memory types - say, only SSBOs.
212  *
213  * Shaders may also have patterns such as:
214  *
215  *    1. shared local memory access
216  *    2. barrier with full variable modes
217  *    3. more shared local memory access
218  *    4. image access
219  *
220  * In this case, the barrier is needed to ensure synchronization between the
221  * various shared memory operations.  Image reads and writes do also exist,
222  * but they are all on one side of the barrier, so it is a no-op for image
223  * access.  We can drop the image mode from the barrier in this case too.
224  *
225  * In addition, we can reduce the memory scope of shared-only barriers, as
226  * shared local memory only exists within a workgroup.
227  */
228 bool
nir_opt_barrier_modes(nir_shader * shader)229 nir_opt_barrier_modes(nir_shader *shader)
230 {
231    bool progress = false;
232 
233    nir_foreach_function_impl(impl, shader) {
234       nir_metadata_require(impl, nir_metadata_dominance |
235                                  nir_metadata_instr_index);
236 
237       if (nir_opt_barrier_modes_impl(impl)) {
238          nir_metadata_preserve(impl, nir_metadata_control_flow |
239                                      nir_metadata_live_defs);
240          progress = true;
241       } else {
242          nir_metadata_preserve(impl, nir_metadata_all);
243       }
244    }
245 
246    return progress;
247 }
248