xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_non_uniform_access.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 struct nu_handle {
28    nir_src *src;
29    nir_def *handle;
30    nir_deref_instr *parent_deref;
31    nir_def *first;
32 };
33 
34 static bool
nu_handle_init(struct nu_handle * h,nir_src * src)35 nu_handle_init(struct nu_handle *h, nir_src *src)
36 {
37    h->src = src;
38 
39    nir_deref_instr *deref = nir_src_as_deref(*src);
40    if (deref) {
41       if (deref->deref_type == nir_deref_type_var)
42          return false;
43 
44       nir_deref_instr *parent = nir_deref_instr_parent(deref);
45       assert(parent->deref_type == nir_deref_type_var);
46 
47       assert(deref->deref_type == nir_deref_type_array);
48       if (nir_src_is_const(deref->arr.index))
49          return false;
50 
51       h->handle = deref->arr.index.ssa;
52       h->parent_deref = parent;
53 
54       return true;
55    } else {
56       if (nir_src_is_const(*src))
57          return false;
58 
59       h->handle = src->ssa;
60       h->parent_deref = NULL;
61 
62       return true;
63    }
64 }
65 
66 static nir_def *
nu_handle_compare(const nir_lower_non_uniform_access_options * options,nir_builder * b,struct nu_handle * handle)67 nu_handle_compare(const nir_lower_non_uniform_access_options *options,
68                   nir_builder *b, struct nu_handle *handle)
69 {
70    nir_component_mask_t channel_mask = ~0;
71    if (options->callback)
72       channel_mask = options->callback(handle->src, options->callback_data);
73    channel_mask &= nir_component_mask(handle->handle->num_components);
74 
75    nir_def *channels[NIR_MAX_VEC_COMPONENTS];
76    for (unsigned i = 0; i < handle->handle->num_components; i++)
77       channels[i] = nir_channel(b, handle->handle, i);
78 
79    handle->first = handle->handle;
80    nir_def *equal_first = nir_imm_true(b);
81    u_foreach_bit(i, channel_mask) {
82       nir_def *first = nir_read_first_invocation(b, channels[i]);
83       handle->first = nir_vector_insert_imm(b, handle->first, first, i);
84 
85       equal_first = nir_iand(b, equal_first, nir_ieq(b, first, channels[i]));
86    }
87 
88    return equal_first;
89 }
90 
91 static void
nu_handle_rewrite(nir_builder * b,struct nu_handle * h)92 nu_handle_rewrite(nir_builder *b, struct nu_handle *h)
93 {
94    if (h->parent_deref) {
95       /* Replicate the deref. */
96       nir_deref_instr *deref =
97          nir_build_deref_array(b, h->parent_deref, h->first);
98       *(h->src) = nir_src_for_ssa(&deref->def);
99    } else {
100       *(h->src) = nir_src_for_ssa(h->first);
101    }
102 }
103 
104 static bool
lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_tex_instr * tex)105 lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options,
106                              nir_builder *b, nir_tex_instr *tex)
107 {
108    if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
109       return false;
110 
111    /* We can have at most one texture and one sampler handle */
112    unsigned num_handles = 0;
113    struct nu_handle handles[2];
114    for (unsigned i = 0; i < tex->num_srcs; i++) {
115       switch (tex->src[i].src_type) {
116       case nir_tex_src_texture_offset:
117       case nir_tex_src_texture_handle:
118       case nir_tex_src_texture_deref:
119          if (!tex->texture_non_uniform)
120             continue;
121          break;
122 
123       case nir_tex_src_sampler_offset:
124       case nir_tex_src_sampler_handle:
125       case nir_tex_src_sampler_deref:
126          if (!tex->sampler_non_uniform)
127             continue;
128          break;
129 
130       default:
131          continue;
132       }
133 
134       assert(num_handles <= ARRAY_SIZE(handles));
135       if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
136          num_handles++;
137    }
138 
139    if (num_handles == 0) {
140       /* nu_handle_init() returned false because the handles are uniform. */
141       tex->texture_non_uniform = false;
142       tex->sampler_non_uniform = false;
143       return false;
144    }
145 
146    b->cursor = nir_instr_remove(&tex->instr);
147 
148    nir_push_loop(b);
149 
150    nir_def *all_equal_first = nir_imm_true(b);
151    for (unsigned i = 0; i < num_handles; i++) {
152       if (i && handles[i].handle == handles[0].handle) {
153          handles[i].first = handles[0].first;
154          continue;
155       }
156 
157       nir_def *equal_first = nu_handle_compare(options, b, &handles[i]);
158       all_equal_first = nir_iand(b, all_equal_first, equal_first);
159    }
160 
161    nir_push_if(b, all_equal_first);
162 
163    for (unsigned i = 0; i < num_handles; i++)
164       nu_handle_rewrite(b, &handles[i]);
165 
166    nir_builder_instr_insert(b, &tex->instr);
167    nir_jump(b, nir_jump_break);
168 
169    tex->texture_non_uniform = false;
170    tex->sampler_non_uniform = false;
171 
172    return true;
173 }
174 
175 static bool
lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_intrinsic_instr * intrin,unsigned handle_src)176 lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options *options,
177                                 nir_builder *b, nir_intrinsic_instr *intrin,
178                                 unsigned handle_src)
179 {
180    if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
181       return false;
182 
183    struct nu_handle handle;
184    if (!nu_handle_init(&handle, &intrin->src[handle_src])) {
185       nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
186       return false;
187    }
188 
189    b->cursor = nir_instr_remove(&intrin->instr);
190 
191    nir_push_loop(b);
192 
193    nir_push_if(b, nu_handle_compare(options, b, &handle));
194 
195    nu_handle_rewrite(b, &handle);
196 
197    nir_builder_instr_insert(b, &intrin->instr);
198    nir_jump(b, nir_jump_break);
199 
200    nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
201 
202    return true;
203 }
204 
205 static bool
nir_lower_non_uniform_access_impl(nir_function_impl * impl,const nir_lower_non_uniform_access_options * options)206 nir_lower_non_uniform_access_impl(nir_function_impl *impl,
207                                   const nir_lower_non_uniform_access_options *options)
208 {
209    bool progress = false;
210 
211    nir_builder b = nir_builder_create(impl);
212 
213    nir_foreach_block_safe(block, impl) {
214       nir_foreach_instr_safe(instr, block) {
215          switch (instr->type) {
216          case nir_instr_type_tex: {
217             nir_tex_instr *tex = nir_instr_as_tex(instr);
218             if ((options->types & nir_lower_non_uniform_texture_access) &&
219                 lower_non_uniform_tex_access(options, &b, tex))
220                progress = true;
221             break;
222          }
223 
224          case nir_instr_type_intrinsic: {
225             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
226             switch (intrin->intrinsic) {
227             case nir_intrinsic_load_ubo:
228                if ((options->types & nir_lower_non_uniform_ubo_access) &&
229                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
230                   progress = true;
231                break;
232 
233             case nir_intrinsic_load_ssbo:
234             case nir_intrinsic_ssbo_atomic:
235             case nir_intrinsic_ssbo_atomic_swap:
236                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
237                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
238                   progress = true;
239                break;
240 
241             case nir_intrinsic_store_ssbo:
242                /* SSBO Stores put the index in the second source */
243                if ((options->types & nir_lower_non_uniform_ssbo_access) &&
244                    lower_non_uniform_access_intrin(options, &b, intrin, 1))
245                   progress = true;
246                break;
247 
248             case nir_intrinsic_get_ssbo_size:
249                if ((options->types & nir_lower_non_uniform_get_ssbo_size) &&
250                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
251                   progress = true;
252                break;
253 
254             case nir_intrinsic_image_load:
255             case nir_intrinsic_image_sparse_load:
256             case nir_intrinsic_image_store:
257             case nir_intrinsic_image_atomic:
258             case nir_intrinsic_image_atomic_swap:
259             case nir_intrinsic_image_levels:
260             case nir_intrinsic_image_size:
261             case nir_intrinsic_image_samples:
262             case nir_intrinsic_image_samples_identical:
263             case nir_intrinsic_image_fragment_mask_load_amd:
264             case nir_intrinsic_bindless_image_load:
265             case nir_intrinsic_bindless_image_sparse_load:
266             case nir_intrinsic_bindless_image_store:
267             case nir_intrinsic_bindless_image_atomic:
268             case nir_intrinsic_bindless_image_atomic_swap:
269             case nir_intrinsic_bindless_image_levels:
270             case nir_intrinsic_bindless_image_size:
271             case nir_intrinsic_bindless_image_samples:
272             case nir_intrinsic_bindless_image_samples_identical:
273             case nir_intrinsic_bindless_image_fragment_mask_load_amd:
274             case nir_intrinsic_image_deref_load:
275             case nir_intrinsic_image_deref_sparse_load:
276             case nir_intrinsic_image_deref_store:
277             case nir_intrinsic_image_deref_atomic:
278             case nir_intrinsic_image_deref_atomic_swap:
279             case nir_intrinsic_image_deref_levels:
280             case nir_intrinsic_image_deref_size:
281             case nir_intrinsic_image_deref_samples:
282             case nir_intrinsic_image_deref_samples_identical:
283             case nir_intrinsic_image_deref_fragment_mask_load_amd:
284                if ((options->types & nir_lower_non_uniform_image_access) &&
285                    lower_non_uniform_access_intrin(options, &b, intrin, 0))
286                   progress = true;
287                break;
288 
289             default:
290                /* Nothing to do */
291                break;
292             }
293             break;
294          }
295 
296          default:
297             /* Nothing to do */
298             break;
299          }
300       }
301    }
302 
303    if (progress)
304       nir_metadata_preserve(impl, nir_metadata_none);
305 
306    return progress;
307 }
308 
309 /**
310  * Lowers non-uniform resource access by using a loop
311  *
312  * This pass lowers non-uniform resource access by using subgroup operations
313  * and a loop.  Most hardware requires things like textures and UBO access
314  * operations to happen on a dynamically uniform (or at least subgroup
315  * uniform) resource.  This pass allows for non-uniform access by placing the
316  * texture instruction in a loop that looks something like this:
317  *
318  * loop {
319  *    bool tex_eq_first = readFirstInvocationARB(texture) == texture;
320  *    bool smp_eq_first = readFirstInvocationARB(sampler) == sampler;
321  *    if (tex_eq_first && smp_eq_first) {
322  *       res = texture(texture, sampler, ...);
323  *       break;
324  *    }
325  * }
326  *
327  * Fortunately, because the instruction is immediately followed by the only
328  * break in the loop, the block containing the instruction dominates the end
329  * of the loop.  Therefore, it's safe to move the instruction into the loop
330  * without fixing up SSA in any way.
331  */
332 bool
nir_lower_non_uniform_access(nir_shader * shader,const nir_lower_non_uniform_access_options * options)333 nir_lower_non_uniform_access(nir_shader *shader,
334                              const nir_lower_non_uniform_access_options *options)
335 {
336    bool progress = false;
337 
338    nir_foreach_function_impl(impl, shader) {
339       if (nir_lower_non_uniform_access_impl(impl, options))
340          progress = true;
341    }
342 
343    return progress;
344 }
345