1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 struct nu_handle {
28 nir_src *src;
29 nir_def *handle;
30 nir_deref_instr *parent_deref;
31 nir_def *first;
32 };
33
34 static bool
nu_handle_init(struct nu_handle * h,nir_src * src)35 nu_handle_init(struct nu_handle *h, nir_src *src)
36 {
37 h->src = src;
38
39 nir_deref_instr *deref = nir_src_as_deref(*src);
40 if (deref) {
41 if (deref->deref_type == nir_deref_type_var)
42 return false;
43
44 nir_deref_instr *parent = nir_deref_instr_parent(deref);
45 assert(parent->deref_type == nir_deref_type_var);
46
47 assert(deref->deref_type == nir_deref_type_array);
48 if (nir_src_is_const(deref->arr.index))
49 return false;
50
51 h->handle = deref->arr.index.ssa;
52 h->parent_deref = parent;
53
54 return true;
55 } else {
56 if (nir_src_is_const(*src))
57 return false;
58
59 h->handle = src->ssa;
60 h->parent_deref = NULL;
61
62 return true;
63 }
64 }
65
66 static nir_def *
nu_handle_compare(const nir_lower_non_uniform_access_options * options,nir_builder * b,struct nu_handle * handle)67 nu_handle_compare(const nir_lower_non_uniform_access_options *options,
68 nir_builder *b, struct nu_handle *handle)
69 {
70 nir_component_mask_t channel_mask = ~0;
71 if (options->callback)
72 channel_mask = options->callback(handle->src, options->callback_data);
73 channel_mask &= nir_component_mask(handle->handle->num_components);
74
75 nir_def *channels[NIR_MAX_VEC_COMPONENTS];
76 for (unsigned i = 0; i < handle->handle->num_components; i++)
77 channels[i] = nir_channel(b, handle->handle, i);
78
79 handle->first = handle->handle;
80 nir_def *equal_first = nir_imm_true(b);
81 u_foreach_bit(i, channel_mask) {
82 nir_def *first = nir_read_first_invocation(b, channels[i]);
83 handle->first = nir_vector_insert_imm(b, handle->first, first, i);
84
85 equal_first = nir_iand(b, equal_first, nir_ieq(b, first, channels[i]));
86 }
87
88 return equal_first;
89 }
90
91 static void
nu_handle_rewrite(nir_builder * b,struct nu_handle * h)92 nu_handle_rewrite(nir_builder *b, struct nu_handle *h)
93 {
94 if (h->parent_deref) {
95 /* Replicate the deref. */
96 nir_deref_instr *deref =
97 nir_build_deref_array(b, h->parent_deref, h->first);
98 *(h->src) = nir_src_for_ssa(&deref->def);
99 } else {
100 *(h->src) = nir_src_for_ssa(h->first);
101 }
102 }
103
104 static bool
lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_tex_instr * tex)105 lower_non_uniform_tex_access(const nir_lower_non_uniform_access_options *options,
106 nir_builder *b, nir_tex_instr *tex)
107 {
108 if (!tex->texture_non_uniform && !tex->sampler_non_uniform)
109 return false;
110
111 /* We can have at most one texture and one sampler handle */
112 unsigned num_handles = 0;
113 struct nu_handle handles[2];
114 for (unsigned i = 0; i < tex->num_srcs; i++) {
115 switch (tex->src[i].src_type) {
116 case nir_tex_src_texture_offset:
117 case nir_tex_src_texture_handle:
118 case nir_tex_src_texture_deref:
119 if (!tex->texture_non_uniform)
120 continue;
121 break;
122
123 case nir_tex_src_sampler_offset:
124 case nir_tex_src_sampler_handle:
125 case nir_tex_src_sampler_deref:
126 if (!tex->sampler_non_uniform)
127 continue;
128 break;
129
130 default:
131 continue;
132 }
133
134 assert(num_handles <= ARRAY_SIZE(handles));
135 if (nu_handle_init(&handles[num_handles], &tex->src[i].src))
136 num_handles++;
137 }
138
139 if (num_handles == 0) {
140 /* nu_handle_init() returned false because the handles are uniform. */
141 tex->texture_non_uniform = false;
142 tex->sampler_non_uniform = false;
143 return false;
144 }
145
146 b->cursor = nir_instr_remove(&tex->instr);
147
148 nir_push_loop(b);
149
150 nir_def *all_equal_first = nir_imm_true(b);
151 for (unsigned i = 0; i < num_handles; i++) {
152 if (i && handles[i].handle == handles[0].handle) {
153 handles[i].first = handles[0].first;
154 continue;
155 }
156
157 nir_def *equal_first = nu_handle_compare(options, b, &handles[i]);
158 all_equal_first = nir_iand(b, all_equal_first, equal_first);
159 }
160
161 nir_push_if(b, all_equal_first);
162
163 for (unsigned i = 0; i < num_handles; i++)
164 nu_handle_rewrite(b, &handles[i]);
165
166 nir_builder_instr_insert(b, &tex->instr);
167 nir_jump(b, nir_jump_break);
168
169 tex->texture_non_uniform = false;
170 tex->sampler_non_uniform = false;
171
172 return true;
173 }
174
175 static bool
lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options * options,nir_builder * b,nir_intrinsic_instr * intrin,unsigned handle_src)176 lower_non_uniform_access_intrin(const nir_lower_non_uniform_access_options *options,
177 nir_builder *b, nir_intrinsic_instr *intrin,
178 unsigned handle_src)
179 {
180 if (!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM))
181 return false;
182
183 struct nu_handle handle;
184 if (!nu_handle_init(&handle, &intrin->src[handle_src])) {
185 nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
186 return false;
187 }
188
189 b->cursor = nir_instr_remove(&intrin->instr);
190
191 nir_push_loop(b);
192
193 nir_push_if(b, nu_handle_compare(options, b, &handle));
194
195 nu_handle_rewrite(b, &handle);
196
197 nir_builder_instr_insert(b, &intrin->instr);
198 nir_jump(b, nir_jump_break);
199
200 nir_intrinsic_set_access(intrin, nir_intrinsic_access(intrin) & ~ACCESS_NON_UNIFORM);
201
202 return true;
203 }
204
205 static bool
nir_lower_non_uniform_access_impl(nir_function_impl * impl,const nir_lower_non_uniform_access_options * options)206 nir_lower_non_uniform_access_impl(nir_function_impl *impl,
207 const nir_lower_non_uniform_access_options *options)
208 {
209 bool progress = false;
210
211 nir_builder b = nir_builder_create(impl);
212
213 nir_foreach_block_safe(block, impl) {
214 nir_foreach_instr_safe(instr, block) {
215 switch (instr->type) {
216 case nir_instr_type_tex: {
217 nir_tex_instr *tex = nir_instr_as_tex(instr);
218 if ((options->types & nir_lower_non_uniform_texture_access) &&
219 lower_non_uniform_tex_access(options, &b, tex))
220 progress = true;
221 break;
222 }
223
224 case nir_instr_type_intrinsic: {
225 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
226 switch (intrin->intrinsic) {
227 case nir_intrinsic_load_ubo:
228 if ((options->types & nir_lower_non_uniform_ubo_access) &&
229 lower_non_uniform_access_intrin(options, &b, intrin, 0))
230 progress = true;
231 break;
232
233 case nir_intrinsic_load_ssbo:
234 case nir_intrinsic_ssbo_atomic:
235 case nir_intrinsic_ssbo_atomic_swap:
236 if ((options->types & nir_lower_non_uniform_ssbo_access) &&
237 lower_non_uniform_access_intrin(options, &b, intrin, 0))
238 progress = true;
239 break;
240
241 case nir_intrinsic_store_ssbo:
242 /* SSBO Stores put the index in the second source */
243 if ((options->types & nir_lower_non_uniform_ssbo_access) &&
244 lower_non_uniform_access_intrin(options, &b, intrin, 1))
245 progress = true;
246 break;
247
248 case nir_intrinsic_get_ssbo_size:
249 if ((options->types & nir_lower_non_uniform_get_ssbo_size) &&
250 lower_non_uniform_access_intrin(options, &b, intrin, 0))
251 progress = true;
252 break;
253
254 case nir_intrinsic_image_load:
255 case nir_intrinsic_image_sparse_load:
256 case nir_intrinsic_image_store:
257 case nir_intrinsic_image_atomic:
258 case nir_intrinsic_image_atomic_swap:
259 case nir_intrinsic_image_levels:
260 case nir_intrinsic_image_size:
261 case nir_intrinsic_image_samples:
262 case nir_intrinsic_image_samples_identical:
263 case nir_intrinsic_image_fragment_mask_load_amd:
264 case nir_intrinsic_bindless_image_load:
265 case nir_intrinsic_bindless_image_sparse_load:
266 case nir_intrinsic_bindless_image_store:
267 case nir_intrinsic_bindless_image_atomic:
268 case nir_intrinsic_bindless_image_atomic_swap:
269 case nir_intrinsic_bindless_image_levels:
270 case nir_intrinsic_bindless_image_size:
271 case nir_intrinsic_bindless_image_samples:
272 case nir_intrinsic_bindless_image_samples_identical:
273 case nir_intrinsic_bindless_image_fragment_mask_load_amd:
274 case nir_intrinsic_image_deref_load:
275 case nir_intrinsic_image_deref_sparse_load:
276 case nir_intrinsic_image_deref_store:
277 case nir_intrinsic_image_deref_atomic:
278 case nir_intrinsic_image_deref_atomic_swap:
279 case nir_intrinsic_image_deref_levels:
280 case nir_intrinsic_image_deref_size:
281 case nir_intrinsic_image_deref_samples:
282 case nir_intrinsic_image_deref_samples_identical:
283 case nir_intrinsic_image_deref_fragment_mask_load_amd:
284 if ((options->types & nir_lower_non_uniform_image_access) &&
285 lower_non_uniform_access_intrin(options, &b, intrin, 0))
286 progress = true;
287 break;
288
289 default:
290 /* Nothing to do */
291 break;
292 }
293 break;
294 }
295
296 default:
297 /* Nothing to do */
298 break;
299 }
300 }
301 }
302
303 if (progress)
304 nir_metadata_preserve(impl, nir_metadata_none);
305
306 return progress;
307 }
308
309 /**
310 * Lowers non-uniform resource access by using a loop
311 *
312 * This pass lowers non-uniform resource access by using subgroup operations
313 * and a loop. Most hardware requires things like textures and UBO access
314 * operations to happen on a dynamically uniform (or at least subgroup
315 * uniform) resource. This pass allows for non-uniform access by placing the
316 * texture instruction in a loop that looks something like this:
317 *
318 * loop {
319 * bool tex_eq_first = readFirstInvocationARB(texture) == texture;
320 * bool smp_eq_first = readFirstInvocationARB(sampler) == sampler;
321 * if (tex_eq_first && smp_eq_first) {
322 * res = texture(texture, sampler, ...);
323 * break;
324 * }
325 * }
326 *
327 * Fortunately, because the instruction is immediately followed by the only
328 * break in the loop, the block containing the instruction dominates the end
329 * of the loop. Therefore, it's safe to move the instruction into the loop
330 * without fixing up SSA in any way.
331 */
332 bool
nir_lower_non_uniform_access(nir_shader * shader,const nir_lower_non_uniform_access_options * options)333 nir_lower_non_uniform_access(nir_shader *shader,
334 const nir_lower_non_uniform_access_options *options)
335 {
336 bool progress = false;
337
338 nir_foreach_function_impl(impl, shader) {
339 if (nir_lower_non_uniform_access_impl(impl, options))
340 progress = true;
341 }
342
343 return progress;
344 }
345