1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 #include "util/hash_table.h"
28 #include "util/macros.h"
29 #include "util/set.h"
30 #include "util/u_dynarray.h"
31
32 /** @file nir_opt_ray_queries.c
33 *
34 * 1. Remove ray queries that the shader is not using the result of.
35 * 2. Combine ray queries which are not simultaneously.
36 */
37
38 static void
mark_query_read(struct set * queries,nir_intrinsic_instr * intrin)39 mark_query_read(struct set *queries,
40 nir_intrinsic_instr *intrin)
41 {
42 nir_def *rq_def = intrin->src[0].ssa;
43
44 nir_variable *query;
45 if (rq_def->parent_instr->type == nir_instr_type_intrinsic) {
46 nir_intrinsic_instr *load_deref =
47 nir_instr_as_intrinsic(rq_def->parent_instr);
48 assert(load_deref->intrinsic == nir_intrinsic_load_deref);
49
50 query = nir_intrinsic_get_var(load_deref, 0);
51 } else if (rq_def->parent_instr->type == nir_instr_type_deref) {
52 query = nir_deref_instr_get_variable(
53 nir_instr_as_deref(rq_def->parent_instr));
54 } else {
55 return;
56 }
57 assert(query);
58
59 _mesa_set_add(queries, query);
60 }
61
62 static void
nir_find_ray_queries_read(struct set * queries,nir_shader * shader)63 nir_find_ray_queries_read(struct set *queries,
64 nir_shader *shader)
65 {
66 nir_foreach_function_impl(impl, shader) {
67 nir_foreach_block(block, impl) {
68 nir_foreach_instr(instr, block) {
69 if (instr->type != nir_instr_type_intrinsic)
70 continue;
71
72 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
73 switch (intrin->intrinsic) {
74 case nir_intrinsic_rq_proceed:
75 if (!list_is_empty(&intrin->def.uses))
76 mark_query_read(queries, intrin);
77 break;
78 case nir_intrinsic_rq_load:
79 mark_query_read(queries, intrin);
80 break;
81 default:
82 break;
83 }
84 }
85 }
86 }
87 }
88
89 static bool
nir_replace_unread_queries_instr(nir_builder * b,nir_instr * instr,void * data)90 nir_replace_unread_queries_instr(nir_builder *b, nir_instr *instr, void *data)
91 {
92 struct set *queries = data;
93
94 if (instr->type != nir_instr_type_intrinsic)
95 return false;
96
97 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
98 switch (intrin->intrinsic) {
99 case nir_intrinsic_rq_initialize:
100 case nir_intrinsic_rq_terminate:
101 case nir_intrinsic_rq_generate_intersection:
102 case nir_intrinsic_rq_confirm_intersection:
103 break;
104 case nir_intrinsic_rq_proceed:
105 break;
106 default:
107 return false;
108 }
109
110 nir_variable *query = nir_intrinsic_get_var(intrin, 0);
111 assert(query);
112
113 struct set_entry *entry = _mesa_set_search(queries, query);
114 if (entry)
115 return false;
116
117 if (intrin->intrinsic == nir_intrinsic_rq_load)
118 assert(list_is_empty(&intrin->def.uses));
119
120 nir_instr_remove(instr);
121
122 return true;
123 }
124
125 bool
nir_opt_ray_queries(nir_shader * shader)126 nir_opt_ray_queries(nir_shader *shader)
127 {
128 struct set *read_queries = _mesa_pointer_set_create(NULL);
129 nir_find_ray_queries_read(read_queries, shader);
130
131 bool progress =
132 nir_shader_instructions_pass(shader,
133 nir_replace_unread_queries_instr,
134 nir_metadata_control_flow,
135 read_queries);
136
137 /* Update the number of queries if some have been removed. */
138 if (progress) {
139 nir_remove_dead_derefs(shader);
140 nir_remove_dead_variables(shader,
141 nir_var_shader_temp | nir_var_function_temp,
142 NULL);
143 }
144
145 _mesa_set_destroy(read_queries, NULL);
146
147 return progress;
148 }
149
150 /**
151 * Merge ray queries that are not used in parallel to reduce scratch memory:
152 *
153 * 1. Store all the ray queries we will consider into an array for
154 * convenient access. Ignore arrays since it would be really complex
155 * to handle and will be rare in praxis.
156 *
157 * 2. Count the number of ray query ranges and allocate the required ranges.
158 *
159 * 3. Populate the ray query range array. A range is started and termninated
160 * rq_initialize (the terminating rq_initialize will be the start of the
161 * next range). There are two hazards:
162 *
163 * 1. rq_initialize can be inside some form of controlflow which can result
164 * in incorrect ranges and invalid merging.
165 *
166 * SOLUTION: Discard the entire ray query when encountering an
167 * instruction that is not dominated by the rq_initialize
168 * of the range.
169 *
170 * 2. With loops, we can underestimate the range because the state may
171 * have to be preserved for multiple iterations.
172 *
173 * SOLUTION: Track parent loops.
174 *
175 * 4. Try to rewrite the variables. For that, we iterate over every ray query
176 * and try to move its ranges to the preceding ray queries.
177 */
178
179 struct rq_range {
180 nir_variable *variable;
181
182 uint32_t first;
183 uint32_t last;
184
185 struct util_dynarray instrs;
186 struct set *loops;
187 };
188
189 #define RQ_NEW_INDEX_NONE 0xFFFFFFFF
190
191 static bool
count_ranges(struct nir_builder * b,nir_intrinsic_instr * intrinsic,void * data)192 count_ranges(struct nir_builder *b, nir_intrinsic_instr *intrinsic,
193 void *data)
194 {
195 if (intrinsic->intrinsic == nir_intrinsic_rq_initialize)
196 (*(uint32_t *)data)++;
197
198 return false;
199 }
200
201 static nir_cf_node *
get_parent_loop(nir_cf_node * node)202 get_parent_loop(nir_cf_node *node)
203 {
204 nir_cf_node *result = NULL;
205 while (node) {
206 if (node->type == nir_cf_node_loop)
207 result = node;
208
209 node = node->parent;
210 }
211 return result;
212 }
213
214 bool
nir_opt_ray_query_ranges(nir_shader * shader)215 nir_opt_ray_query_ranges(nir_shader *shader)
216 {
217 assert(exec_list_length(&shader->functions) == 1);
218
219 struct nir_function *func =
220 (struct nir_function *)exec_list_get_head_const(&shader->functions);
221 assert(func->impl);
222
223 uint32_t ray_query_count = 0;
224 nir_foreach_variable_in_shader(var, shader) {
225 if (!var->data.ray_query || glsl_type_is_array(var->type))
226 continue;
227 ray_query_count++;
228 }
229 nir_foreach_function_temp_variable(var, func->impl) {
230 if (!var->data.ray_query || glsl_type_is_array(var->type))
231 continue;
232 ray_query_count++;
233 }
234
235 if (ray_query_count <= 1) {
236 nir_metadata_preserve(func->impl, nir_metadata_all);
237 return false;
238 }
239
240 void *mem_ctx = ralloc_context(NULL);
241
242 nir_metadata_require(func->impl, nir_metadata_instr_index | nir_metadata_dominance);
243
244 nir_variable **ray_queries = ralloc_array(mem_ctx, nir_variable *, ray_query_count);
245 ray_query_count = 0;
246
247 nir_foreach_variable_in_shader(var, shader) {
248 if (!var->data.ray_query || glsl_type_is_array(var->type))
249 continue;
250
251 ray_queries[ray_query_count] = var;
252 ray_query_count++;
253 }
254
255 nir_foreach_function_temp_variable(var, func->impl) {
256 if (!var->data.ray_query || glsl_type_is_array(var->type))
257 continue;
258
259 ray_queries[ray_query_count] = var;
260 ray_query_count++;
261 }
262
263 uint32_t range_count = 0;
264 nir_shader_intrinsics_pass(shader, count_ranges, nir_metadata_all,
265 &range_count);
266
267 struct rq_range *ranges = rzalloc_array(mem_ctx, struct rq_range, range_count);
268
269 struct hash_table *range_indices = _mesa_pointer_hash_table_create(mem_ctx);
270 uint32_t target_index = 0;
271
272 nir_foreach_block(block, func->impl) {
273 nir_cf_node *parent_loop = get_parent_loop(&block->cf_node);
274
275 nir_foreach_instr(instr, block) {
276 if (instr->type != nir_instr_type_intrinsic)
277 continue;
278
279 nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
280 if (!nir_intrinsic_is_ray_query(intrinsic->intrinsic))
281 continue;
282
283 nir_deref_instr *ray_query_deref =
284 nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
285
286 if (ray_query_deref->deref_type != nir_deref_type_var)
287 continue;
288
289 if (intrinsic->intrinsic == nir_intrinsic_rq_initialize) {
290 _mesa_hash_table_insert(range_indices, ray_query_deref->var,
291 (void *)(uintptr_t)target_index);
292
293 ranges[target_index].variable = ray_query_deref->var;
294 ranges[target_index].first = instr->index;
295 ranges[target_index].last = instr->index;
296 util_dynarray_init(&ranges[target_index].instrs, mem_ctx);
297 ranges[target_index].loops = _mesa_pointer_set_create(mem_ctx);
298
299 target_index++;
300 }
301
302 struct hash_entry *index_entry =
303 _mesa_hash_table_search(range_indices, ray_query_deref->var);
304 struct rq_range *range = ranges + (uintptr_t)index_entry->data;
305
306 if (intrinsic->intrinsic != nir_intrinsic_rq_initialize) {
307 /* If the initialize instruction does not dominate every other
308 * instruction in the range, we have to reject the enire query
309 * since we can not be certain about the ranges:
310 *
311 * rayQuery rq;
312 * if (i == 0)
313 * init(rq);
314 * ... <-- Another ray query that would get merged.
315 * if (i == 1)
316 * init(rq); <--+
317 * if (i == 0) |
318 * proceed(rq); <--+ Not dominated by init!
319 * if (i == 1)
320 * proceed(rq);
321 */
322 nir_instr *init = *util_dynarray_element(&range->instrs, nir_instr *, 0);
323 if (!nir_block_dominates(init->block, instr->block)) {
324 for (uint32_t i = 0; i < ray_query_count; i++) {
325 if (ray_queries[i] == ray_query_deref->var) {
326 ray_queries[i] = NULL;
327 break;
328 }
329 }
330
331 continue;
332 }
333
334 range->last = MAX2(range->last, instr->index);
335 }
336
337 util_dynarray_append(&range->instrs, nir_instr *, instr);
338
339 if (parent_loop)
340 _mesa_set_add(range->loops, parent_loop);
341 }
342 }
343
344 range_count = target_index;
345
346 /* Try to push ray query ranges 'down'. */
347 for (uint32_t rq_index = 1; rq_index < ray_query_count; rq_index++) {
348 if (!ray_queries[rq_index])
349 continue;
350
351 for (uint32_t dom_rq_index = 0; dom_rq_index < rq_index; dom_rq_index++) {
352 if (!ray_queries[dom_rq_index])
353 continue;
354
355 bool collides = false;
356
357 for (uint32_t range_index = 0; range_index < range_count; range_index++) {
358 if (ranges[range_index].variable != ray_queries[rq_index])
359 continue;
360
361 for (uint32_t dom_range_index = 0; dom_range_index < range_count; dom_range_index++) {
362 if (ranges[dom_range_index].variable != ray_queries[dom_rq_index])
363 continue;
364
365 if (!(ranges[dom_range_index].first > ranges[range_index].last ||
366 ranges[dom_range_index].last < ranges[range_index].first)) {
367 collides = true;
368 break;
369 }
370
371 if (_mesa_set_intersects(ranges[dom_range_index].loops,
372 ranges[range_index].loops)) {
373 collides = true;
374 break;
375 }
376 }
377
378 if (collides)
379 break;
380 }
381
382 if (collides)
383 continue;
384
385 for (uint32_t range_index = 0; range_index < range_count; range_index++) {
386 if (ranges[range_index].variable != ray_queries[rq_index])
387 continue;
388
389 ranges[range_index].variable = ray_queries[dom_rq_index];
390 }
391 }
392 }
393
394 /* Remap the ray query derefs to the new variables. */
395 bool progress = false;
396 for (uint32_t range_index = 0; range_index < range_count; range_index++) {
397 struct rq_range *range = ranges + range_index;
398 util_dynarray_foreach(&range->instrs, nir_instr *, instr) {
399 nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(*instr);
400 nir_deref_instr *ray_query_deref =
401 nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
402 if (ray_query_deref->var != range->variable) {
403 ray_query_deref->var = range->variable;
404 progress = true;
405 }
406 }
407 }
408
409 nir_metadata_preserve(func->impl, nir_metadata_all);
410
411 /* Remove dead ray queries. */
412 if (progress) {
413 nir_remove_dead_derefs(shader);
414 nir_remove_dead_variables(shader, nir_var_shader_temp | nir_var_function_temp,
415 NULL);
416 }
417
418 ralloc_free(mem_ctx);
419
420 return progress;
421 }
422