xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_ray_queries.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 #include "util/hash_table.h"
28 #include "util/macros.h"
29 #include "util/set.h"
30 #include "util/u_dynarray.h"
31 
32 /** @file nir_opt_ray_queries.c
33  *
34  * 1. Remove ray queries that the shader is not using the result of.
35  * 2. Combine ray queries which are not simultaneously.
36  */
37 
38 static void
mark_query_read(struct set * queries,nir_intrinsic_instr * intrin)39 mark_query_read(struct set *queries,
40                 nir_intrinsic_instr *intrin)
41 {
42    nir_def *rq_def = intrin->src[0].ssa;
43 
44    nir_variable *query;
45    if (rq_def->parent_instr->type == nir_instr_type_intrinsic) {
46       nir_intrinsic_instr *load_deref =
47          nir_instr_as_intrinsic(rq_def->parent_instr);
48       assert(load_deref->intrinsic == nir_intrinsic_load_deref);
49 
50       query = nir_intrinsic_get_var(load_deref, 0);
51    } else if (rq_def->parent_instr->type == nir_instr_type_deref) {
52       query = nir_deref_instr_get_variable(
53          nir_instr_as_deref(rq_def->parent_instr));
54    } else {
55       return;
56    }
57    assert(query);
58 
59    _mesa_set_add(queries, query);
60 }
61 
62 static void
nir_find_ray_queries_read(struct set * queries,nir_shader * shader)63 nir_find_ray_queries_read(struct set *queries,
64                           nir_shader *shader)
65 {
66    nir_foreach_function_impl(impl, shader) {
67       nir_foreach_block(block, impl) {
68          nir_foreach_instr(instr, block) {
69             if (instr->type != nir_instr_type_intrinsic)
70                continue;
71 
72             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
73             switch (intrin->intrinsic) {
74             case nir_intrinsic_rq_proceed:
75                if (!list_is_empty(&intrin->def.uses))
76                   mark_query_read(queries, intrin);
77                break;
78             case nir_intrinsic_rq_load:
79                mark_query_read(queries, intrin);
80                break;
81             default:
82                break;
83             }
84          }
85       }
86    }
87 }
88 
89 static bool
nir_replace_unread_queries_instr(nir_builder * b,nir_instr * instr,void * data)90 nir_replace_unread_queries_instr(nir_builder *b, nir_instr *instr, void *data)
91 {
92    struct set *queries = data;
93 
94    if (instr->type != nir_instr_type_intrinsic)
95       return false;
96 
97    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
98    switch (intrin->intrinsic) {
99    case nir_intrinsic_rq_initialize:
100    case nir_intrinsic_rq_terminate:
101    case nir_intrinsic_rq_generate_intersection:
102    case nir_intrinsic_rq_confirm_intersection:
103       break;
104    case nir_intrinsic_rq_proceed:
105       break;
106    default:
107       return false;
108    }
109 
110    nir_variable *query = nir_intrinsic_get_var(intrin, 0);
111    assert(query);
112 
113    struct set_entry *entry = _mesa_set_search(queries, query);
114    if (entry)
115       return false;
116 
117    if (intrin->intrinsic == nir_intrinsic_rq_load)
118       assert(list_is_empty(&intrin->def.uses));
119 
120    nir_instr_remove(instr);
121 
122    return true;
123 }
124 
125 bool
nir_opt_ray_queries(nir_shader * shader)126 nir_opt_ray_queries(nir_shader *shader)
127 {
128    struct set *read_queries = _mesa_pointer_set_create(NULL);
129    nir_find_ray_queries_read(read_queries, shader);
130 
131    bool progress =
132       nir_shader_instructions_pass(shader,
133                                    nir_replace_unread_queries_instr,
134                                    nir_metadata_control_flow,
135                                    read_queries);
136 
137    /* Update the number of queries if some have been removed. */
138    if (progress) {
139       nir_remove_dead_derefs(shader);
140       nir_remove_dead_variables(shader,
141                                 nir_var_shader_temp | nir_var_function_temp,
142                                 NULL);
143    }
144 
145    _mesa_set_destroy(read_queries, NULL);
146 
147    return progress;
148 }
149 
150 /**
151  * Merge ray queries that are not used in parallel to reduce scratch memory:
152  *
153  * 1. Store all the ray queries we will consider into an array for
154  *    convenient access. Ignore arrays since it would be really complex
155  *    to handle and will be rare in praxis.
156  *
157  * 2. Count the number of ray query ranges and allocate the required ranges.
158  *
159  * 3. Populate the ray query range array. A range is started and termninated
160  *    rq_initialize (the terminating rq_initialize will be the start of the
161  *    next range). There are two hazards:
162  *
163  *    1. rq_initialize can be inside some form of controlflow which can result
164  *       in incorrect ranges and invalid merging.
165  *
166  *       SOLUTION: Discard the entire ray query when encountering an
167  *                 instruction that is not dominated by the rq_initialize
168  *                 of the range.
169  *
170  *    2. With loops, we can underestimate the range because the state may
171  *       have to be preserved for multiple iterations.
172  *
173  *       SOLUTION: Track parent loops.
174  *
175  * 4. Try to rewrite the variables. For that, we iterate over every ray query
176  *    and try to move its ranges to the preceding ray queries.
177  */
178 
179 struct rq_range {
180    nir_variable *variable;
181 
182    uint32_t first;
183    uint32_t last;
184 
185    struct util_dynarray instrs;
186    struct set *loops;
187 };
188 
189 #define RQ_NEW_INDEX_NONE 0xFFFFFFFF
190 
191 static bool
count_ranges(struct nir_builder * b,nir_intrinsic_instr * intrinsic,void * data)192 count_ranges(struct nir_builder *b, nir_intrinsic_instr *intrinsic,
193              void *data)
194 {
195    if (intrinsic->intrinsic == nir_intrinsic_rq_initialize)
196       (*(uint32_t *)data)++;
197 
198    return false;
199 }
200 
201 static nir_cf_node *
get_parent_loop(nir_cf_node * node)202 get_parent_loop(nir_cf_node *node)
203 {
204    nir_cf_node *result = NULL;
205    while (node) {
206       if (node->type == nir_cf_node_loop)
207          result = node;
208 
209       node = node->parent;
210    }
211    return result;
212 }
213 
214 bool
nir_opt_ray_query_ranges(nir_shader * shader)215 nir_opt_ray_query_ranges(nir_shader *shader)
216 {
217    assert(exec_list_length(&shader->functions) == 1);
218 
219    struct nir_function *func =
220       (struct nir_function *)exec_list_get_head_const(&shader->functions);
221    assert(func->impl);
222 
223    uint32_t ray_query_count = 0;
224    nir_foreach_variable_in_shader(var, shader) {
225       if (!var->data.ray_query || glsl_type_is_array(var->type))
226          continue;
227       ray_query_count++;
228    }
229    nir_foreach_function_temp_variable(var, func->impl) {
230       if (!var->data.ray_query || glsl_type_is_array(var->type))
231          continue;
232       ray_query_count++;
233    }
234 
235    if (ray_query_count <= 1) {
236       nir_metadata_preserve(func->impl, nir_metadata_all);
237       return false;
238    }
239 
240    void *mem_ctx = ralloc_context(NULL);
241 
242    nir_metadata_require(func->impl, nir_metadata_instr_index | nir_metadata_dominance);
243 
244    nir_variable **ray_queries = ralloc_array(mem_ctx, nir_variable *, ray_query_count);
245    ray_query_count = 0;
246 
247    nir_foreach_variable_in_shader(var, shader) {
248       if (!var->data.ray_query || glsl_type_is_array(var->type))
249          continue;
250 
251       ray_queries[ray_query_count] = var;
252       ray_query_count++;
253    }
254 
255    nir_foreach_function_temp_variable(var, func->impl) {
256       if (!var->data.ray_query || glsl_type_is_array(var->type))
257          continue;
258 
259       ray_queries[ray_query_count] = var;
260       ray_query_count++;
261    }
262 
263    uint32_t range_count = 0;
264    nir_shader_intrinsics_pass(shader, count_ranges, nir_metadata_all,
265                               &range_count);
266 
267    struct rq_range *ranges = rzalloc_array(mem_ctx, struct rq_range, range_count);
268 
269    struct hash_table *range_indices = _mesa_pointer_hash_table_create(mem_ctx);
270    uint32_t target_index = 0;
271 
272    nir_foreach_block(block, func->impl) {
273       nir_cf_node *parent_loop = get_parent_loop(&block->cf_node);
274 
275       nir_foreach_instr(instr, block) {
276          if (instr->type != nir_instr_type_intrinsic)
277             continue;
278 
279          nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
280          if (!nir_intrinsic_is_ray_query(intrinsic->intrinsic))
281             continue;
282 
283          nir_deref_instr *ray_query_deref =
284             nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
285 
286          if (ray_query_deref->deref_type != nir_deref_type_var)
287             continue;
288 
289          if (intrinsic->intrinsic == nir_intrinsic_rq_initialize) {
290             _mesa_hash_table_insert(range_indices, ray_query_deref->var,
291                                     (void *)(uintptr_t)target_index);
292 
293             ranges[target_index].variable = ray_query_deref->var;
294             ranges[target_index].first = instr->index;
295             ranges[target_index].last = instr->index;
296             util_dynarray_init(&ranges[target_index].instrs, mem_ctx);
297             ranges[target_index].loops = _mesa_pointer_set_create(mem_ctx);
298 
299             target_index++;
300          }
301 
302          struct hash_entry *index_entry =
303             _mesa_hash_table_search(range_indices, ray_query_deref->var);
304          struct rq_range *range = ranges + (uintptr_t)index_entry->data;
305 
306          if (intrinsic->intrinsic != nir_intrinsic_rq_initialize) {
307             /* If the initialize instruction does not dominate every other
308              * instruction in the range, we have to reject the enire query
309              * since we can not be certain about the ranges:
310              *
311              * rayQuery rq;
312              * if (i == 0)
313              *    init(rq);
314              * ...             <-- Another ray query that would get merged.
315              * if (i == 1)
316              *    init(rq);    <--+
317              * if (i == 0)        |
318              *    proceed(rq); <--+ Not dominated by init!
319              * if (i == 1)
320              *    proceed(rq);
321              */
322             nir_instr *init = *util_dynarray_element(&range->instrs, nir_instr *, 0);
323             if (!nir_block_dominates(init->block, instr->block)) {
324                for (uint32_t i = 0; i < ray_query_count; i++) {
325                   if (ray_queries[i] == ray_query_deref->var) {
326                      ray_queries[i] = NULL;
327                      break;
328                   }
329                }
330 
331                continue;
332             }
333 
334             range->last = MAX2(range->last, instr->index);
335          }
336 
337          util_dynarray_append(&range->instrs, nir_instr *, instr);
338 
339          if (parent_loop)
340             _mesa_set_add(range->loops, parent_loop);
341       }
342    }
343 
344    range_count = target_index;
345 
346    /* Try to push ray query ranges 'down'. */
347    for (uint32_t rq_index = 1; rq_index < ray_query_count; rq_index++) {
348       if (!ray_queries[rq_index])
349          continue;
350 
351       for (uint32_t dom_rq_index = 0; dom_rq_index < rq_index; dom_rq_index++) {
352          if (!ray_queries[dom_rq_index])
353             continue;
354 
355          bool collides = false;
356 
357          for (uint32_t range_index = 0; range_index < range_count; range_index++) {
358             if (ranges[range_index].variable != ray_queries[rq_index])
359                continue;
360 
361             for (uint32_t dom_range_index = 0; dom_range_index < range_count; dom_range_index++) {
362                if (ranges[dom_range_index].variable != ray_queries[dom_rq_index])
363                   continue;
364 
365                if (!(ranges[dom_range_index].first > ranges[range_index].last ||
366                      ranges[dom_range_index].last < ranges[range_index].first)) {
367                   collides = true;
368                   break;
369                }
370 
371                if (_mesa_set_intersects(ranges[dom_range_index].loops,
372                                         ranges[range_index].loops)) {
373                   collides = true;
374                   break;
375                }
376             }
377 
378             if (collides)
379                break;
380          }
381 
382          if (collides)
383             continue;
384 
385          for (uint32_t range_index = 0; range_index < range_count; range_index++) {
386             if (ranges[range_index].variable != ray_queries[rq_index])
387                continue;
388 
389             ranges[range_index].variable = ray_queries[dom_rq_index];
390          }
391       }
392    }
393 
394    /* Remap the ray query derefs to the new variables. */
395    bool progress = false;
396    for (uint32_t range_index = 0; range_index < range_count; range_index++) {
397       struct rq_range *range = ranges + range_index;
398       util_dynarray_foreach(&range->instrs, nir_instr *, instr) {
399          nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(*instr);
400          nir_deref_instr *ray_query_deref =
401             nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
402          if (ray_query_deref->var != range->variable) {
403             ray_query_deref->var = range->variable;
404             progress = true;
405          }
406       }
407    }
408 
409    nir_metadata_preserve(func->impl, nir_metadata_all);
410 
411    /* Remove dead ray queries. */
412    if (progress) {
413       nir_remove_dead_derefs(shader);
414       nir_remove_dead_variables(shader, nir_var_shader_temp | nir_var_function_temp,
415                                 NULL);
416    }
417 
418    ralloc_free(mem_ctx);
419 
420    return progress;
421 }
422