1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 #include "nir_loop_analyze.h"
28
29 /* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32
30 * instructions. (Multiply counting nodes and magic number 5.) But there is
31 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed
32 * to give about the same results. Around 5 instructions per node. But some
33 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so
34 * we set it to 26.
35 */
36 #define LOOP_UNROLL_LIMIT 26
37
38 /* Prepare this loop for unrolling by first converting to lcssa and then
39 * converting the phis from the top level of the loop body to regs.
40 * Partially converting out of SSA allows us to unroll the loop without having
41 * to keep track of and update phis along the way which gets tricky and
42 * doesn't add much value over converting to regs.
43 *
44 * The loop may have a jump instruction at the end of the loop which does
45 * nothing. Once we're out of SSA, we can safely delete it so we don't have
46 * to deal with it later.
47 */
48 static void
loop_prepare_for_unroll(nir_loop * loop)49 loop_prepare_for_unroll(nir_loop *loop)
50 {
51 nir_rematerialize_derefs_in_use_blocks_impl(
52 nir_cf_node_get_function(&loop->cf_node));
53
54 nir_convert_loop_to_lcssa(loop);
55
56 /* Lower phis at the top level of the loop body */
57 foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
58 if (nir_cf_node_block == node->type) {
59 nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
60 }
61 }
62
63 /* Lower phis after the loop */
64 nir_block *block_after_loop =
65 nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
66
67 nir_lower_phis_to_regs_block(block_after_loop);
68
69 /* Remove jump if it's the last instruction in the loop */
70 nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
71 if (last_instr && last_instr->type == nir_instr_type_jump) {
72 nir_instr_remove(last_instr);
73 }
74 }
75
76 static void
get_first_blocks_in_terminator(nir_loop_terminator * term,nir_block ** first_break_block,nir_block ** first_continue_block)77 get_first_blocks_in_terminator(nir_loop_terminator *term,
78 nir_block **first_break_block,
79 nir_block **first_continue_block)
80 {
81 if (term->continue_from_then) {
82 *first_continue_block = nir_if_first_then_block(term->nif);
83 *first_break_block = nir_if_first_else_block(term->nif);
84 } else {
85 *first_continue_block = nir_if_first_else_block(term->nif);
86 *first_break_block = nir_if_first_then_block(term->nif);
87 }
88 }
89
90 /**
91 * Unroll a loop where we know exactly how many iterations there are and there
92 * is only a single exit point. Note here we can unroll loops with multiple
93 * theoretical exits that only have a single terminating exit that we always
94 * know is the "real" exit.
95 *
96 * loop {
97 * ...instrs...
98 * }
99 *
100 * And the iteration count is 3, the output will be:
101 *
102 * ...instrs... ...instrs... ...instrs...
103 */
104 static void
simple_unroll(nir_loop * loop)105 simple_unroll(nir_loop *loop)
106 {
107 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
108 assert(nir_is_trivial_loop_if(limiting_term->nif,
109 limiting_term->break_block));
110
111 loop_prepare_for_unroll(loop);
112
113 /* Skip over loop terminator and get the loop body. */
114 list_for_each_entry(nir_loop_terminator, terminator,
115 &loop->info->loop_terminator_list,
116 loop_terminator_link) {
117
118 /* Remove all but the limiting terminator as we know the other exit
119 * conditions can never be met. Note we need to extract any instructions
120 * in the continue from branch and insert then into the loop body before
121 * removing it.
122 */
123 if (terminator->nif != limiting_term->nif) {
124 nir_block *first_break_block;
125 nir_block *first_continue_block;
126 get_first_blocks_in_terminator(terminator, &first_break_block,
127 &first_continue_block);
128
129 assert(nir_is_trivial_loop_if(terminator->nif,
130 terminator->break_block));
131
132 nir_cf_list continue_from_lst;
133 nir_cf_extract(&continue_from_lst,
134 nir_before_block(first_continue_block),
135 nir_after_block(terminator->continue_from_block));
136 nir_cf_reinsert(&continue_from_lst,
137 nir_after_cf_node(&terminator->nif->cf_node));
138
139 nir_cf_node_remove(&terminator->nif->cf_node);
140 }
141 }
142
143 nir_block *first_break_block;
144 nir_block *first_continue_block;
145 get_first_blocks_in_terminator(limiting_term, &first_break_block,
146 &first_continue_block);
147
148 /* Pluck out the loop header */
149 nir_block *header_blk = nir_loop_first_block(loop);
150 nir_cf_list lp_header;
151 nir_cf_extract(&lp_header, nir_before_block(header_blk),
152 nir_before_cf_node(&limiting_term->nif->cf_node));
153
154 /* Add the continue from block of the limiting terminator to the loop body
155 */
156 nir_cf_list continue_from_lst;
157 nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block),
158 nir_after_block(limiting_term->continue_from_block));
159 nir_cf_reinsert(&continue_from_lst,
160 nir_after_cf_node(&limiting_term->nif->cf_node));
161
162 /* Pluck out the loop body */
163 nir_cf_list loop_body;
164 nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node),
165 nir_after_block(nir_loop_last_block(loop)));
166
167 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
168
169 /* Clone the loop header and insert before the loop */
170 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
171 nir_before_cf_node(&loop->cf_node),
172 remap_table);
173
174 for (unsigned i = 0; i < loop->info->max_trip_count; i++) {
175 /* Clone loop body and insert before the loop */
176 nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent,
177 nir_before_cf_node(&loop->cf_node),
178 remap_table);
179
180 /* Clone loop header and insert after loop body */
181 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
182 nir_before_cf_node(&loop->cf_node),
183 remap_table);
184 }
185
186 /* Remove the break from the loop terminator and add instructions from
187 * the break block after the unrolled loop.
188 */
189 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
190 nir_instr_remove(break_instr);
191 nir_cf_list break_list;
192 nir_cf_extract(&break_list, nir_before_block(first_break_block),
193 nir_after_block(limiting_term->break_block));
194
195 /* Clone so things get properly remapped */
196 nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent,
197 nir_before_cf_node(&loop->cf_node),
198 remap_table);
199
200 /* Remove the loop */
201 nir_cf_node_remove(&loop->cf_node);
202
203 /* Delete the original loop body, break block & header */
204 nir_cf_delete(&lp_header);
205 nir_cf_delete(&loop_body);
206 nir_cf_delete(&break_list);
207
208 _mesa_hash_table_destroy(remap_table, NULL);
209 }
210
211 static void
move_cf_list_into_loop_term(nir_cf_list * lst,nir_loop_terminator * term)212 move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term)
213 {
214 /* Move the rest of the loop inside the continue-from-block */
215 nir_cf_reinsert(lst, nir_after_block(term->continue_from_block));
216
217 /* Remove the break */
218 nir_instr_remove(nir_block_last_instr(term->break_block));
219 }
220
221 static nir_cursor
get_complex_unroll_insert_location(nir_cf_node * node,bool continue_from_then)222 get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then)
223 {
224 if (node->type == nir_cf_node_loop) {
225 return nir_before_cf_node(node);
226 } else {
227 nir_if *if_stmt = nir_cf_node_as_if(node);
228 if (continue_from_then) {
229 return nir_after_block(nir_if_last_then_block(if_stmt));
230 } else {
231 return nir_after_block(nir_if_last_else_block(if_stmt));
232 }
233 }
234 }
235
236 static nir_cf_node *
complex_unroll_loop_body(nir_loop * loop,nir_loop_terminator * unlimit_term,nir_cf_list * lp_header,nir_cf_list * lp_body,struct hash_table * remap_table,unsigned num_times_to_clone)237 complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term,
238 nir_cf_list *lp_header, nir_cf_list *lp_body,
239 struct hash_table *remap_table,
240 unsigned num_times_to_clone)
241 {
242 /* In the terminator that we have no trip count for move everything after
243 * the terminator into the continue from branch.
244 */
245 nir_cf_list loop_end;
246 nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node),
247 nir_after_block(nir_loop_last_block(loop)));
248 move_cf_list_into_loop_term(&loop_end, unlimit_term);
249
250 /* Pluck out the loop body. */
251 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
252 nir_after_block(nir_loop_last_block(loop)));
253
254 /* Set unroll_loc to the loop as we will insert the unrolled loop before it
255 */
256 nir_cf_node *unroll_loc = &loop->cf_node;
257
258 /* Temp list to store the cloned loop as we unroll */
259 nir_cf_list unrolled_lp_body;
260
261 for (unsigned i = 0; i < num_times_to_clone; i++) {
262
263 nir_cursor cursor =
264 get_complex_unroll_insert_location(unroll_loc,
265 unlimit_term->continue_from_then);
266
267 /* Clone loop header and insert in if branch */
268 nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent,
269 cursor, remap_table);
270
271 cursor =
272 get_complex_unroll_insert_location(unroll_loc,
273 unlimit_term->continue_from_then);
274
275 /* Clone loop body */
276 nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent,
277 remap_table);
278
279 unroll_loc = exec_node_data(nir_cf_node,
280 exec_list_get_tail(&unrolled_lp_body.list),
281 node);
282 assert(unroll_loc->type == nir_cf_node_block &&
283 exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list));
284
285 /* Get the unrolled if node */
286 unroll_loc = nir_cf_node_prev(unroll_loc);
287
288 /* Insert unrolled loop body */
289 nir_cf_reinsert(&unrolled_lp_body, cursor);
290 }
291
292 return unroll_loc;
293 }
294
295 /**
296 * Unroll a loop with two exists when the trip count of one of the exits is
297 * unknown. If continue_from_then is true, the loop is repeated only when the
298 * "then" branch of the if is taken; otherwise it is repeated only
299 * when the "else" branch of the if is taken.
300 *
301 * For example, if the input is:
302 *
303 * loop {
304 * ...phis/condition...
305 * if condition {
306 * ...then instructions...
307 * } else {
308 * ...continue instructions...
309 * break
310 * }
311 * ...body...
312 * }
313 *
314 * And the iteration count is 3, and unlimit_term->continue_from_then is true,
315 * then the output will be:
316 *
317 * ...condition...
318 * if condition {
319 * ...then instructions...
320 * ...body...
321 * if condition {
322 * ...then instructions...
323 * ...body...
324 * if condition {
325 * ...then instructions...
326 * ...body...
327 * } else {
328 * ...continue instructions...
329 * }
330 * } else {
331 * ...continue instructions...
332 * }
333 * } else {
334 * ...continue instructions...
335 * }
336 */
337 static void
complex_unroll(nir_loop * loop,nir_loop_terminator * unlimit_term,bool limiting_term_second)338 complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term,
339 bool limiting_term_second)
340 {
341 assert(nir_is_trivial_loop_if(unlimit_term->nif,
342 unlimit_term->break_block));
343
344 nir_loop_terminator *limiting_term = loop->info->limiting_terminator;
345 assert(nir_is_trivial_loop_if(limiting_term->nif,
346 limiting_term->break_block));
347
348 loop_prepare_for_unroll(loop);
349
350 nir_block *header_blk = nir_loop_first_block(loop);
351
352 nir_cf_list lp_header;
353 nir_cf_list limit_break_list;
354 unsigned num_times_to_clone;
355 if (limiting_term_second) {
356 /* Pluck out the loop header */
357 nir_cf_extract(&lp_header, nir_before_block(header_blk),
358 nir_before_cf_node(&unlimit_term->nif->cf_node));
359
360 /* We need some special handling when its the second terminator causing
361 * us to exit the loop for example:
362 *
363 * for (int i = 0; i < uniform_lp_count; i++) {
364 * colour = vec4(0.0, 1.0, 0.0, 1.0);
365 *
366 * if (i == 1) {
367 * break;
368 * }
369 * ... any further code is unreachable after i == 1 ...
370 * }
371 */
372 nir_cf_list after_lt;
373 nir_if *limit_if = limiting_term->nif;
374 nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node),
375 nir_after_block(nir_loop_last_block(loop)));
376 move_cf_list_into_loop_term(&after_lt, limiting_term);
377
378 /* Because the trip count is the number of times we pass over the entire
379 * loop before hitting a break when the second terminator is the
380 * limiting terminator we can actually execute code inside the loop when
381 * trip count == 0 e.g. the code above the break. So we need to bump
382 * the trip_count in order for the code below to clone anything. When
383 * trip count == 1 we execute the code above the break twice and the
384 * code below it once so we need clone things twice and so on.
385 */
386 num_times_to_clone = loop->info->max_trip_count + 1;
387 } else {
388 /* Pluck out the loop header */
389 nir_cf_extract(&lp_header, nir_before_block(header_blk),
390 nir_before_cf_node(&limiting_term->nif->cf_node));
391
392 nir_block *first_break_block;
393 nir_block *first_continue_block;
394 get_first_blocks_in_terminator(limiting_term, &first_break_block,
395 &first_continue_block);
396
397 /* Remove the break then extract instructions from the break block so we
398 * can insert them in the innermost else of the unrolled loop.
399 */
400 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block);
401 nir_instr_remove(break_instr);
402 nir_cf_extract(&limit_break_list, nir_before_block(first_break_block),
403 nir_after_block(limiting_term->break_block));
404
405 nir_cf_list continue_list;
406 nir_cf_extract(&continue_list, nir_before_block(first_continue_block),
407 nir_after_block(limiting_term->continue_from_block));
408
409 nir_cf_reinsert(&continue_list,
410 nir_after_cf_node(&limiting_term->nif->cf_node));
411
412 nir_cf_node_remove(&limiting_term->nif->cf_node);
413
414 num_times_to_clone = loop->info->max_trip_count;
415 }
416
417 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL);
418
419 nir_cf_list lp_body;
420 nir_cf_node *unroll_loc =
421 complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body,
422 remap_table, num_times_to_clone);
423
424 if (!limiting_term_second) {
425 assert(unroll_loc->type == nir_cf_node_if);
426
427 nir_cursor cursor =
428 get_complex_unroll_insert_location(unroll_loc,
429 unlimit_term->continue_from_then);
430
431 /* Clone loop header and insert in if branch */
432 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
433 cursor, remap_table);
434
435 cursor =
436 get_complex_unroll_insert_location(unroll_loc,
437 unlimit_term->continue_from_then);
438
439 /* Clone so things get properly remapped, and insert break block from
440 * the limiting terminator.
441 */
442 nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent,
443 cursor, remap_table);
444
445 nir_cf_delete(&limit_break_list);
446 }
447
448 /* The loop has been unrolled so remove it. */
449 nir_cf_node_remove(&loop->cf_node);
450
451 /* Delete the original loop header and body */
452 nir_cf_delete(&lp_header);
453 nir_cf_delete(&lp_body);
454
455 _mesa_hash_table_destroy(remap_table, NULL);
456 }
457
458 /**
459 * Unroll loops where we only have a single terminator but the exact trip
460 * count is unknown. For example:
461 *
462 * for (int i = 0; i < imin(x, 4); i++)
463 * ...
464 */
465 static void
complex_unroll_single_terminator(nir_loop * loop)466 complex_unroll_single_terminator(nir_loop *loop)
467 {
468 assert(list_is_singular(&loop->info->loop_terminator_list));
469 assert(loop->info->limiting_terminator);
470 assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif,
471 loop->info->limiting_terminator->break_block));
472
473 nir_loop_terminator *terminator = loop->info->limiting_terminator;
474
475 loop_prepare_for_unroll(loop);
476
477 /* Pluck out the loop header */
478 nir_cf_list lp_header;
479 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
480 nir_before_cf_node(&terminator->nif->cf_node));
481
482 struct hash_table *remap_table =
483 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
484 _mesa_key_pointer_equal);
485
486 /* We need to clone the loop one extra time in order to clone the lcssa
487 * vars for the last iteration (they are inside the following ifs break
488 * branch). We leave other passes to clean up this redundant if.
489 */
490 unsigned num_times_to_clone = loop->info->max_trip_count + 1;
491
492 nir_cf_list lp_body;
493 UNUSED nir_cf_node *unroll_loc =
494 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
495 remap_table, num_times_to_clone);
496
497 assert(unroll_loc->type == nir_cf_node_if);
498
499 /* We need to clone the lcssa vars in order to insert them on both sides
500 * of the if in the last iteration/if-statement. Otherwise the optimisation
501 * passes will have trouble optimising the unrolled if ladder.
502 */
503 nir_cursor cursor =
504 get_complex_unroll_insert_location(unroll_loc,
505 terminator->continue_from_then);
506
507 nir_if *if_stmt = nir_cf_node_as_if(unroll_loc);
508 nir_cursor start_cursor;
509 nir_cursor end_cursor;
510 if (terminator->continue_from_then) {
511 start_cursor = nir_before_block(nir_if_first_else_block(if_stmt));
512 end_cursor = nir_after_block(nir_if_last_else_block(if_stmt));
513 } else {
514 start_cursor = nir_before_block(nir_if_first_then_block(if_stmt));
515 end_cursor = nir_after_block(nir_if_last_then_block(if_stmt));
516 }
517
518 nir_cf_list lcssa_list;
519 nir_cf_extract(&lcssa_list, start_cursor, end_cursor);
520
521 /* Insert the cloned vars in the last continue branch */
522 nir_cf_list_clone_and_reinsert(&lcssa_list, loop->cf_node.parent,
523 cursor, remap_table);
524
525 start_cursor = terminator->continue_from_then ? nir_before_block(nir_if_first_else_block(if_stmt)) : nir_before_block(nir_if_first_then_block(if_stmt));
526
527 /* Reinsert the cloned vars back where they came from */
528 nir_cf_reinsert(&lcssa_list, start_cursor);
529
530 /* Delete the original loop header and body */
531 nir_cf_delete(&lp_header);
532 nir_cf_delete(&lp_body);
533
534 /* The original loop has been replaced so remove it. */
535 nir_cf_node_remove(&loop->cf_node);
536
537 _mesa_hash_table_destroy(remap_table, NULL);
538 }
539
540 /* Unrolls the classic wrapper loops e.g
541 *
542 * do {
543 * // ...
544 * } while (false)
545 */
546 static bool
wrapper_unroll(nir_loop * loop)547 wrapper_unroll(nir_loop *loop)
548 {
549 if (!list_is_empty(&loop->info->loop_terminator_list)) {
550
551 /* Unrolling a loop with a large number of exits can result in a
552 * large inrease in register pressure. For now we just skip
553 * unrolling if we have more than 3 exits (not including the break
554 * at the end of the loop).
555 *
556 * TODO: Most loops that fit this pattern are simply switch
557 * statements that are converted to a loop to take advantage of
558 * exiting jump instruction handling. In this case we could make
559 * use of a binary seach pattern like we do in
560 * nir_lower_indirect_derefs(), this should allow us to unroll the
561 * loops in an optimal way and should also avoid some of the
562 * register pressure that comes from simply nesting the
563 * terminators one after the other.
564 */
565 if (list_length(&loop->info->loop_terminator_list) > 3)
566 return false;
567
568 loop_prepare_for_unroll(loop);
569
570 nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop));
571 list_for_each_entry(nir_loop_terminator, terminator,
572 &loop->info->loop_terminator_list,
573 loop_terminator_link) {
574
575 /* Remove break from the terminator */
576 nir_instr *break_instr =
577 nir_block_last_instr(terminator->break_block);
578 nir_instr_remove(break_instr);
579
580 /* Pluck out the loop body. */
581 nir_cf_list loop_body;
582 nir_cf_extract(&loop_body,
583 nir_after_cf_node(&terminator->nif->cf_node),
584 loop_end);
585
586 /* Reinsert loop body into continue from block */
587 nir_cf_reinsert(&loop_body,
588 nir_after_block(terminator->continue_from_block));
589
590 loop_end = terminator->continue_from_then ? nir_after_block(nir_if_last_then_block(terminator->nif)) : nir_after_block(nir_if_last_else_block(terminator->nif));
591 }
592 } else {
593 loop_prepare_for_unroll(loop);
594 }
595
596 /* Pluck out the loop body. */
597 nir_cf_list loop_body;
598 nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)),
599 nir_after_block(nir_loop_last_block(loop)));
600
601 /* Reinsert loop body after the loop */
602 nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node));
603
604 /* The loop has been unrolled so remove it. */
605 nir_cf_node_remove(&loop->cf_node);
606
607 return true;
608 }
609
610 static bool
is_access_out_of_bounds(nir_loop_terminator * term,nir_deref_instr * deref,unsigned trip_count)611 is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref,
612 unsigned trip_count)
613 {
614 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
615 if (d->deref_type != nir_deref_type_array)
616 continue;
617
618 nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr);
619 nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src;
620 if (!nir_srcs_equal(d->arr.index, src))
621 continue;
622
623 nir_deref_instr *parent = nir_deref_instr_parent(d);
624 assert(glsl_type_is_array(parent->type) ||
625 glsl_type_is_matrix(parent->type) ||
626 glsl_type_is_vector(parent->type));
627
628 /* We have already unrolled the loop and the new one will be imbedded in
629 * the innermost continue branch. So unless the array is greater than
630 * the trip count any iteration over the loop will be an out of bounds
631 * access of the array.
632 */
633 unsigned length = glsl_type_is_vector(parent->type) ? glsl_get_vector_elements(parent->type) : glsl_get_length(parent->type);
634 return length <= trip_count;
635 }
636
637 return false;
638 }
639
640 static bool
comparison_contains_instr(nir_scalar cond_scalar,nir_instr * instr)641 comparison_contains_instr(nir_scalar cond_scalar, nir_instr *instr)
642 {
643 if (nir_is_terminator_condition_with_two_inputs(cond_scalar)) {
644 nir_alu_instr *comparison =
645 nir_instr_as_alu(cond_scalar.def->parent_instr);
646 return comparison->src[0].src.ssa->parent_instr == instr ||
647 comparison->src[1].src.ssa->parent_instr == instr;
648 }
649
650 return false;
651 }
652
653 /* If we know an array access is going to be out of bounds remove or replace
654 * the access with an undef. This can later result in the entire loop being
655 * removed by nir_opt_dead_cf().
656 */
657 static void
remove_out_of_bounds_induction_use(nir_shader * shader,nir_loop * loop,nir_loop_terminator * term,nir_cf_list * lp_header,nir_cf_list * lp_body,unsigned trip_count)658 remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop,
659 nir_loop_terminator *term,
660 nir_cf_list *lp_header,
661 nir_cf_list *lp_body,
662 unsigned trip_count)
663 {
664 if (!loop->info->guessed_trip_count)
665 return;
666
667 /* Temporarily recreate the original loop so we can alter it */
668 nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop)));
669 nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop)));
670
671 nir_builder b = nir_builder_create(nir_cf_node_get_function(&loop->cf_node));
672
673 nir_foreach_block_in_cf_node(block, &loop->cf_node) {
674 nir_foreach_instr_safe(instr, block) {
675 if (instr->type != nir_instr_type_intrinsic)
676 continue;
677
678 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
679
680 /* Check for arrays variably-indexed by a loop induction variable.
681 * If this access is out of bounds remove the instruction or replace
682 * its use with an undefined instruction.
683 * If the loop is no longer useful we leave it for the appropriate
684 * pass to clean it up for us.
685 */
686 if (intrin->intrinsic == nir_intrinsic_load_deref ||
687 intrin->intrinsic == nir_intrinsic_store_deref ||
688 intrin->intrinsic == nir_intrinsic_copy_deref) {
689
690 if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]),
691 trip_count)) {
692 if (intrin->intrinsic == nir_intrinsic_load_deref) {
693 nir_alu_instr *term_alu =
694 nir_instr_as_alu(term->nif->condition.ssa->parent_instr);
695 b.cursor = nir_before_instr(term->nif->condition.ssa->parent_instr);
696
697 /* If the out of bounds load is used in the comparison of the
698 * loop terminator replace the condition with true so that the
699 * loop can be removed.
700 */
701 bool exit_on_true = !term->continue_from_then;
702 if (term_alu->op == nir_op_ior && exit_on_true) {
703 nir_scalar src0_cond_scalar = { term_alu->src[0].src.ssa, 0 };
704 nir_scalar src1_cond_scalar = { term_alu->src[1].src.ssa, 0 };
705
706 bool replaced_comparison = false;
707 if (comparison_contains_instr(src0_cond_scalar, instr)) {
708 nir_def *t = nir_imm_true(&b);
709 nir_def_rewrite_uses(term_alu->src[0].src.ssa, t);
710 replaced_comparison = true;
711 }
712
713 if (comparison_contains_instr(src1_cond_scalar, instr)) {
714 nir_def *t = nir_imm_true(&b);
715 nir_def_rewrite_uses(term_alu->src[1].src.ssa, t);
716 replaced_comparison = true;
717 }
718
719 if (replaced_comparison)
720 continue;
721 }
722
723 nir_def *undef =
724 nir_undef(&b, intrin->def.num_components,
725 intrin->def.bit_size);
726 nir_def_rewrite_uses(&intrin->def,
727 undef);
728 } else {
729 nir_instr_remove(instr);
730 continue;
731 }
732 }
733
734 if (intrin->intrinsic == nir_intrinsic_copy_deref &&
735 is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]),
736 trip_count)) {
737 nir_instr_remove(instr);
738 }
739 }
740 }
741 }
742
743 /* Now that we are done extract the loop header and body again */
744 nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)),
745 nir_before_cf_node(&term->nif->cf_node));
746 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)),
747 nir_after_block(nir_loop_last_block(loop)));
748 }
749
750 /* Partially unrolls loops that don't have a known trip count.
751 */
752 static void
partial_unroll(nir_shader * shader,nir_loop * loop,unsigned trip_count)753 partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count)
754 {
755 assert(list_is_singular(&loop->info->loop_terminator_list));
756
757 nir_loop_terminator *terminator =
758 list_first_entry(&loop->info->loop_terminator_list,
759 nir_loop_terminator, loop_terminator_link);
760
761 assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block));
762
763 loop_prepare_for_unroll(loop);
764
765 /* Pluck out the loop header */
766 nir_cf_list lp_header;
767 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)),
768 nir_before_cf_node(&terminator->nif->cf_node));
769
770 struct hash_table *remap_table =
771 _mesa_hash_table_create(NULL, _mesa_hash_pointer,
772 _mesa_key_pointer_equal);
773
774 nir_cf_list lp_body;
775 nir_cf_node *unroll_loc =
776 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body,
777 remap_table, trip_count);
778
779 /* Attempt to remove out of bounds array access */
780 remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header,
781 &lp_body, trip_count);
782
783 nir_cursor cursor =
784 get_complex_unroll_insert_location(unroll_loc,
785 terminator->continue_from_then);
786
787 /* Reinsert the loop in the innermost nested continue branch of the unrolled
788 * loop.
789 */
790 nir_loop *new_loop = nir_loop_create(shader);
791 nir_cf_node_insert(cursor, &new_loop->cf_node);
792 new_loop->partially_unrolled = true;
793
794 /* Clone loop header and insert into new loop */
795 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent,
796 nir_after_cf_list(&new_loop->body),
797 remap_table);
798
799 /* Clone loop body and insert into new loop */
800 nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent,
801 nir_after_cf_list(&new_loop->body),
802 remap_table);
803
804 /* Insert break back into terminator */
805 nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break);
806 nir_block *break_block = _mesa_hash_table_search(remap_table, terminator->break_block)->data;
807 nir_instr_insert_after_block(break_block, &brk->instr);
808
809 /* Delete the original loop header and body */
810 nir_cf_delete(&lp_header);
811 nir_cf_delete(&lp_body);
812
813 /* The original loop has been replaced so remove it. */
814 nir_cf_node_remove(&loop->cf_node);
815
816 _mesa_hash_table_destroy(remap_table, NULL);
817 }
818
819 static bool
is_indirect_load(nir_instr * instr)820 is_indirect_load(nir_instr *instr)
821 {
822 if (instr->type == nir_instr_type_intrinsic) {
823 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
824
825 if ((intrin->intrinsic == nir_intrinsic_load_ubo ||
826 intrin->intrinsic == nir_intrinsic_load_ssbo) &&
827 !nir_src_is_const(intrin->src[1])) {
828 return true;
829 }
830
831 if (intrin->intrinsic == nir_intrinsic_load_global)
832 return true;
833
834 if (intrin->intrinsic == nir_intrinsic_load_deref ||
835 intrin->intrinsic == nir_intrinsic_store_deref) {
836 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
837 nir_variable_mode mem_modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_global;
838 if (!nir_deref_mode_may_be(deref, mem_modes))
839 return false;
840 while (deref) {
841 if ((deref->deref_type == nir_deref_type_array ||
842 deref->deref_type == nir_deref_type_ptr_as_array) &&
843 !nir_src_is_const(deref->arr.index)) {
844 return true;
845 }
846 deref = nir_deref_instr_parent(deref);
847 }
848 }
849 } else if (instr->type == nir_instr_type_tex) {
850 nir_tex_instr *tex = nir_instr_as_tex(instr);
851
852 for (unsigned i = 0; i < tex->num_srcs; i++) {
853 if (!nir_src_is_const(tex->src[i].src))
854 return true;
855 }
856 }
857
858 return false;
859 }
860
861 static bool
can_pipeline_loads(nir_loop * loop)862 can_pipeline_loads(nir_loop *loop)
863 {
864 if (!loop->info->exact_trip_count_known)
865 return false;
866
867 bool interesting_loads = false;
868
869 foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
870 if (cf_node == &loop->info->limiting_terminator->nif->cf_node)
871 continue;
872
873 /* Control flow usually prevents useful scheduling */
874 if (cf_node->type != nir_cf_node_block)
875 return false;
876
877 if (interesting_loads)
878 continue;
879
880 nir_block *block = nir_cf_node_as_block(cf_node);
881 nir_foreach_instr(instr, block) {
882 if (is_indirect_load(instr)) {
883 interesting_loads = true;
884 break;
885 }
886 }
887 }
888
889 return interesting_loads;
890 }
891
892 /*
893 * Returns true if we should unroll the loop, otherwise false.
894 */
895 static bool
check_unrolling_restrictions(nir_shader * shader,nir_loop * loop)896 check_unrolling_restrictions(nir_shader *shader, nir_loop *loop)
897 {
898 if (loop->control == nir_loop_control_unroll)
899 return true;
900
901 if (loop->control == nir_loop_control_dont_unroll)
902 return false;
903
904 nir_loop_info *li = loop->info;
905 unsigned max_iter = shader->options->max_unroll_iterations;
906 /* Unroll much more aggressively if it can hide load latency. */
907 if (shader->options->max_unroll_iterations_aggressive && can_pipeline_loads(loop))
908 max_iter = shader->options->max_unroll_iterations_aggressive;
909 /* Tune differently if the loop has double ops and soft fp64 is in use */
910 else if (shader->options->max_unroll_iterations_fp64 && loop->info->has_soft_fp64)
911 max_iter = shader->options->max_unroll_iterations_fp64;
912 unsigned trip_count =
913 li->max_trip_count ? li->max_trip_count : li->guessed_trip_count;
914
915 if (li->force_unroll && !li->guessed_trip_count && trip_count <= max_iter)
916 return true;
917
918 unsigned cost_limit = max_iter * LOOP_UNROLL_LIMIT;
919 unsigned cost = li->instr_cost * trip_count;
920
921 if (cost <= cost_limit && trip_count <= max_iter)
922 return true;
923
924 return false;
925 }
926
927 static bool
928 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
929 bool *unrolled_this_block);
930
931 static bool
process_loops_in_block(nir_shader * sh,struct exec_list * block,bool * has_nested_loop_out)932 process_loops_in_block(nir_shader *sh, struct exec_list *block,
933 bool *has_nested_loop_out)
934 {
935 /* We try to unroll as many loops in one pass as possible.
936 * E.g. we can safely unroll both loops in this block:
937 *
938 * if (...) {
939 * loop {...}
940 * }
941 *
942 * if (...) {
943 * loop {...}
944 * }
945 *
946 * Unrolling one loop doesn't affect the other one.
947 *
948 * On the other hand for block with:
949 *
950 * loop {...}
951 * ...
952 * loop {...}
953 *
954 * It is unsafe to unroll both loops in one pass without taking
955 * complicating precautions, since the structure of the block would
956 * change after unrolling the first loop. So in such a case we leave
957 * the second loop for the next iteration of unrolling to handle.
958 */
959
960 bool progress = false;
961 bool unrolled_this_block = false;
962
963 foreach_list_typed(nir_cf_node, nested_node, node, block) {
964 if (process_loops(sh, nested_node,
965 has_nested_loop_out, &unrolled_this_block)) {
966 progress = true;
967
968 /* If current node is unrolled we could not safely continue
969 * our iteration since we don't know the next node
970 * and it's hard to guarantee that we won't end up unrolling
971 * inner loop of the currently unrolled one, if such exists.
972 */
973 if (unrolled_this_block) {
974 break;
975 }
976 }
977 }
978
979 return progress;
980 }
981
982 static bool
process_loops(nir_shader * sh,nir_cf_node * cf_node,bool * has_nested_loop_out,bool * unrolled_this_block)983 process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out,
984 bool *unrolled_this_block)
985 {
986 bool progress = false;
987 bool has_nested_loop = false;
988 nir_loop *loop;
989
990 switch (cf_node->type) {
991 case nir_cf_node_block:
992 return progress;
993 case nir_cf_node_if: {
994 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
995 progress |= process_loops_in_block(sh, &if_stmt->then_list,
996 has_nested_loop_out);
997 progress |= process_loops_in_block(sh, &if_stmt->else_list,
998 has_nested_loop_out);
999 return progress;
1000 }
1001 case nir_cf_node_loop: {
1002 loop = nir_cf_node_as_loop(cf_node);
1003 assert(!nir_loop_has_continue_construct(loop));
1004 progress |= process_loops_in_block(sh, &loop->body, &has_nested_loop);
1005
1006 break;
1007 }
1008 default:
1009 unreachable("unknown cf node type");
1010 }
1011
1012 const bool unrolled_child_block = progress;
1013
1014 /* Don't attempt to unroll a second inner loop in this pass, wait until the
1015 * next pass as we have altered the cf.
1016 */
1017 if (!progress && loop->control != nir_loop_control_dont_unroll) {
1018
1019 /* Remove the conditional break statements associated with all terminators
1020 * that are associated with a fixed iteration count, except for the one
1021 * associated with the limiting terminator--that one needs to stay, since
1022 * it terminates the loop.
1023 */
1024 if (loop->info->limiting_terminator) {
1025 list_for_each_entry_safe(nir_loop_terminator, t,
1026 &loop->info->loop_terminator_list,
1027 loop_terminator_link) {
1028 if (t->exact_trip_count_unknown)
1029 continue;
1030
1031 if (t != loop->info->limiting_terminator) {
1032
1033 /* Only delete the if-statement if the continue block is empty.
1034 * We trust that nir_opt_if() does its job well enough to
1035 * remove all instructions from the continue block when possible.
1036 */
1037 nir_block *first_continue_from_blk = t->continue_from_then ? nir_if_first_then_block(t->nif) : nir_if_first_else_block(t->nif);
1038
1039 if (!(nir_cf_node_is_last(&first_continue_from_blk->cf_node) &&
1040 exec_list_is_empty(&first_continue_from_blk->instr_list)))
1041 continue;
1042
1043 /* Now delete the if */
1044 nir_cf_node_remove(&t->nif->cf_node);
1045
1046 /* Also remove it from the terminator list */
1047 list_del(&t->loop_terminator_link);
1048
1049 progress = true;
1050 }
1051 }
1052 }
1053
1054 /* Check for the classic
1055 *
1056 * do {
1057 * // ...
1058 * } while (false)
1059 *
1060 * that is used to wrap multi-line macros. GLSL IR also wraps switch
1061 * statements in a loop like this.
1062 */
1063 if (loop->info->limiting_terminator == NULL &&
1064 !loop->info->complex_loop) {
1065
1066 nir_block *last_loop_blk = nir_loop_last_block(loop);
1067 if (nir_block_ends_in_break(last_loop_blk)) {
1068 progress = wrapper_unroll(loop);
1069 goto exit;
1070 }
1071
1072 /* If we were able to guess the loop iteration based on array access
1073 * then do a partial unroll.
1074 */
1075 bool one_lt = list_is_singular(&loop->info->loop_terminator_list);
1076 if (!has_nested_loop && one_lt && !loop->partially_unrolled &&
1077 loop->info->guessed_trip_count &&
1078 check_unrolling_restrictions(sh, loop)) {
1079 partial_unroll(sh, loop, loop->info->guessed_trip_count);
1080 progress = true;
1081 }
1082 }
1083
1084 /* Intentionally don't consider exact_trip_count_known here. When
1085 * max_trip_count is non-zero, it is the upper bound on the number of
1086 * times the loop will iterate, but the loop may iterate less. For
1087 * example, the following loop will iterate 0 or 1 time:
1088 *
1089 * for (i = 0; i < min(x, 1); i++) { ... }
1090 *
1091 * Trivial single-interation loops (e.g., do { ... } while (false)) and
1092 * trivial zero-iteration loops (e.g., while (false) { ... }) will have
1093 * already been handled.
1094 *
1095 * If the loop is known to execute at most once and meets the other
1096 * unrolling criteria, unroll it even if it has nested loops.
1097 *
1098 * It is unlikely that such loops exist in real shaders. GraphicsFuzz is
1099 * known to generate spurious loops that iterate exactly once. It is
1100 * plausible that it could eventually start generating loops like the
1101 * example above, so it seems logical to defend against it now.
1102 */
1103 if (!loop->info->limiting_terminator ||
1104 (loop->info->max_trip_count != 1 && has_nested_loop))
1105 goto exit;
1106
1107 if (!check_unrolling_restrictions(sh, loop))
1108 goto exit;
1109
1110 if (loop->info->exact_trip_count_known) {
1111 simple_unroll(loop);
1112 progress = true;
1113 } else {
1114 /* Attempt to unroll loops with two terminators. */
1115 unsigned num_lt = list_length(&loop->info->loop_terminator_list);
1116 if (num_lt == 2 &&
1117 !loop->info->limiting_terminator->exact_trip_count_unknown) {
1118 bool limiting_term_second = true;
1119 nir_loop_terminator *terminator =
1120 list_first_entry(&loop->info->loop_terminator_list,
1121 nir_loop_terminator, loop_terminator_link);
1122
1123 if (terminator->nif == loop->info->limiting_terminator->nif) {
1124 limiting_term_second = false;
1125 terminator =
1126 list_last_entry(&loop->info->loop_terminator_list,
1127 nir_loop_terminator, loop_terminator_link);
1128 }
1129
1130 /* If the first terminator has a trip count of zero and is the
1131 * limiting terminator just do a simple unroll as the second
1132 * terminator can never be reached.
1133 */
1134 if (loop->info->max_trip_count == 0 && !limiting_term_second) {
1135 simple_unroll(loop);
1136 } else {
1137 complex_unroll(loop, terminator, limiting_term_second);
1138 }
1139 progress = true;
1140 }
1141
1142 if (num_lt == 1) {
1143 assert(loop->info->limiting_terminator->exact_trip_count_unknown);
1144 complex_unroll_single_terminator(loop);
1145 progress = true;
1146 }
1147 }
1148 }
1149
1150 exit:
1151 *has_nested_loop_out = true;
1152 if (progress && !unrolled_child_block)
1153 *unrolled_this_block = true;
1154
1155 return progress;
1156 }
1157
1158 static bool
nir_opt_loop_unroll_impl(nir_function_impl * impl,nir_variable_mode indirect_mask,bool force_unroll_sampler_indirect)1159 nir_opt_loop_unroll_impl(nir_function_impl *impl,
1160 nir_variable_mode indirect_mask,
1161 bool force_unroll_sampler_indirect)
1162 {
1163 bool progress = false;
1164 nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask,
1165 (int)force_unroll_sampler_indirect);
1166 nir_metadata_require(impl, nir_metadata_block_index);
1167
1168 bool has_nested_loop = false;
1169 progress |= process_loops_in_block(impl->function->shader, &impl->body,
1170 &has_nested_loop);
1171
1172 if (progress) {
1173 nir_metadata_preserve(impl, nir_metadata_none);
1174 nir_lower_reg_intrinsics_to_ssa_impl(impl);
1175 } else {
1176 nir_metadata_preserve(impl, nir_metadata_all);
1177 }
1178
1179 return progress;
1180 }
1181
1182 /**
1183 * indirect_mask specifies which type of indirectly accessed variables
1184 * should force loop unrolling.
1185 */
1186 bool
nir_opt_loop_unroll(nir_shader * shader)1187 nir_opt_loop_unroll(nir_shader *shader)
1188 {
1189 bool progress = false;
1190
1191 bool force_unroll_sampler_indirect = shader->options->force_indirect_unrolling_sampler;
1192 nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
1193 nir_foreach_function_impl(impl, shader) {
1194 progress |= nir_opt_loop_unroll_impl(impl, indirect_mask,
1195 force_unroll_sampler_indirect);
1196 }
1197 return progress;
1198 }
1199