xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_if.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir_builder.h"
25 #include "nir.h"
26 #include "nir_constant_expressions.h"
27 #include "nir_control_flow.h"
28 #include "nir_loop_analyze.h"
29 
30 static nir_def *clone_alu_and_replace_src_defs(nir_builder *b,
31                                                const nir_alu_instr *alu,
32                                                nir_def **src_defs);
33 
34 /**
35  * Gets the single block that jumps back to the loop header. Already assumes
36  * there is exactly one such block.
37  */
38 static nir_block *
find_continue_block(nir_loop * loop)39 find_continue_block(nir_loop *loop)
40 {
41    nir_block *header_block = nir_loop_first_block(loop);
42    nir_block *prev_block =
43       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
44 
45    assert(header_block->predecessors->entries == 2);
46 
47    set_foreach(header_block->predecessors, pred_entry) {
48       if (pred_entry->key != prev_block)
49          return (nir_block *)pred_entry->key;
50    }
51 
52    unreachable("Continue block not found!");
53 }
54 
55 /**
56  * Does a phi have one constant value from outside a loop and one from inside?
57  */
58 static bool
phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr * phi,const nir_block * entry_block,bool * entry_val,bool * continue_val)59 phi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr *phi,
60                                                        const nir_block *entry_block,
61                                                        bool *entry_val,
62                                                        bool *continue_val)
63 {
64    /* We already know we have exactly one continue */
65    assert(exec_list_length(&phi->srcs) == 2);
66 
67    *entry_val = false;
68    *continue_val = false;
69 
70    nir_foreach_phi_src(src, phi) {
71       if (!nir_src_is_const(src->src))
72          return false;
73 
74       if (src->pred != entry_block) {
75          *continue_val = nir_src_as_bool(src->src);
76       } else {
77          *entry_val = nir_src_as_bool(src->src);
78       }
79    }
80 
81    return true;
82 }
83 
84 /**
85  * This optimization detects if statements at the tops of loops where the
86  * condition is a phi node of two constants and moves half of the if to above
87  * the loop and the other half of the if to the end of the loop.  A simple for
88  * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
89  * ends up looking something like this:
90  *
91  * vec1 32 ssa_0 = load_const (0x00000000)
92  * vec1 32 ssa_1 = load_const (0xffffffff)
93  * loop {
94  *    block block_1:
95  *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
96  *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
97  *    if ssa_3 {
98  *       block block_2:
99  *       vec1 32 ssa_4 = load_const (0x00000001)
100  *       vec1 32 ssa_5 = iadd ssa_2, ssa_4
101  *    } else {
102  *       block block_3:
103  *    }
104  *    block block_4:
105  *    vec1 32 ssa_6 = load_const (0x00000004)
106  *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
107  *    if ssa_7 {
108  *       block block_5:
109  *    } else {
110  *       block block_6:
111  *       break
112  *    }
113  *    block block_7:
114  * }
115  *
116  * This turns it into something like this:
117  *
118  * // Stuff from block 1
119  * // Stuff from block 3
120  * loop {
121  *    block block_1:
122  *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
123  *    vec1 32 ssa_6 = load_const (0x00000004)
124  *    vec1 32 ssa_7 = ilt ssa_2, ssa_6
125  *    if ssa_7 {
126  *       block block_5:
127  *    } else {
128  *       block block_6:
129  *       break
130  *    }
131  *    block block_7:
132  *    // Stuff from block 1
133  *    // Stuff from block 2
134  *    vec1 32 ssa_4 = load_const (0x00000001)
135  *    vec1 32 ssa_5 = iadd ssa_2, ssa_4
136  * }
137  */
138 static bool
opt_peel_loop_initial_if(nir_loop * loop)139 opt_peel_loop_initial_if(nir_loop *loop)
140 {
141    nir_block *header_block = nir_loop_first_block(loop);
142    nir_block *const prev_block =
143       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
144 
145    /* It would be insane if this were not true */
146    assert(_mesa_set_search(header_block->predecessors, prev_block));
147 
148    /* The loop must have exactly one continue block which could be a block
149     * ending in a continue instruction or the "natural" continue from the
150     * last block in the loop back to the top.
151     */
152    if (header_block->predecessors->entries != 2)
153       return false;
154 
155    nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
156    if (!if_node || if_node->type != nir_cf_node_if)
157       return false;
158 
159    nir_if *nif = nir_cf_node_as_if(if_node);
160 
161    nir_def *cond = nif->condition.ssa;
162    if (cond->parent_instr->type != nir_instr_type_phi)
163       return false;
164 
165    nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
166    if (cond->parent_instr->block != header_block)
167       return false;
168 
169    bool entry_val = false, continue_val = false;
170    if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
171                                                                prev_block,
172                                                                &entry_val,
173                                                                &continue_val))
174       return false;
175 
176    /* If they both execute or both don't execute, this is a job for
177     * nir_dead_cf, not this pass.
178     */
179    if ((entry_val && continue_val) || (!entry_val && !continue_val))
180       return false;
181 
182    struct exec_list *continue_list, *entry_list;
183    if (continue_val) {
184       continue_list = &nif->then_list;
185       entry_list = &nif->else_list;
186    } else {
187       continue_list = &nif->else_list;
188       entry_list = &nif->then_list;
189    }
190 
191    /* We want to be moving the contents of entry_list to above the loop so it
192     * can't contain any break or continue instructions.
193     */
194    foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
195       nir_foreach_block_in_cf_node(block, cf_node) {
196          if (nir_block_ends_in_jump(block))
197             return false;
198       }
199    }
200 
201    /* We're about to re-arrange a bunch of blocks so make sure that we don't
202     * have deref uses which cross block boundaries.  We don't want a deref
203     * accidentally ending up in a phi.
204     */
205    nir_rematerialize_derefs_in_use_blocks_impl(
206       nir_cf_node_get_function(&loop->cf_node));
207 
208    /* Before we do anything, convert the loop to LCSSA.  We're about to
209     * replace a bunch of SSA defs with registers and this will prevent any of
210     * it from leaking outside the loop.
211     */
212    nir_convert_loop_to_lcssa(loop);
213 
214    nir_block *after_if_block =
215       nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
216 
217    /* Get rid of phis in the header block since we will be duplicating it */
218    nir_lower_phis_to_regs_block(header_block);
219    /* Get rid of phis after the if since dominance will change */
220    nir_lower_phis_to_regs_block(after_if_block);
221 
222    /* Get rid of SSA defs in the pieces we're about to move around */
223    nir_lower_ssa_defs_to_regs_block(header_block);
224    nir_foreach_block_in_cf_node(block, &nif->cf_node)
225       nir_lower_ssa_defs_to_regs_block(block);
226 
227    nir_cf_list header, tmp;
228    nir_cf_extract(&header, nir_before_block(header_block),
229                   nir_after_block(header_block));
230 
231    nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
232    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
233    nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
234                   nir_after_cf_list(entry_list));
235    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
236 
237    nir_cf_reinsert(&header,
238                    nir_after_block_before_jump(find_continue_block(loop)));
239 
240    bool continue_list_jumps =
241       nir_block_ends_in_jump(exec_node_data(nir_block,
242                                             exec_list_get_tail(continue_list),
243                                             cf_node.node));
244 
245    nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
246                   nir_after_cf_list(continue_list));
247 
248    /* Get continue block again as the previous reinsert might have removed the
249     * block.  Also, if both the continue list and the continue block ends in
250     * jump instructions, removes the jump from the latter, as it will not be
251     * executed if we insert the continue list before it. */
252 
253    nir_block *continue_block = find_continue_block(loop);
254 
255    if (continue_list_jumps) {
256       nir_instr *last_instr = nir_block_last_instr(continue_block);
257       if (last_instr && last_instr->type == nir_instr_type_jump)
258          nir_instr_remove(last_instr);
259    }
260 
261    nir_cf_reinsert(&tmp,
262                    nir_after_block_before_jump(continue_block));
263 
264    nir_cf_node_remove(&nif->cf_node);
265 
266    return true;
267 }
268 
269 static bool
alu_instr_is_type_conversion(const nir_alu_instr * alu)270 alu_instr_is_type_conversion(const nir_alu_instr *alu)
271 {
272    return nir_op_infos[alu->op].num_inputs == 1 &&
273           nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0];
274 }
275 
276 static bool
is_trivial_bcsel(const nir_instr * instr,bool allow_non_phi_src)277 is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src)
278 {
279    if (instr->type != nir_instr_type_alu)
280       return false;
281 
282    nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
283    if (!nir_op_is_selection(bcsel->op))
284       return false;
285 
286    for (unsigned i = 0; i < 3; i++) {
287       if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
288           bcsel->src[i].src.ssa->parent_instr->block != instr->block)
289          return false;
290 
291       if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) {
292          /* opt_split_alu_of_phi() is able to peel that src from the loop */
293          if (i == 0 || !allow_non_phi_src)
294             return false;
295          allow_non_phi_src = false;
296       }
297    }
298 
299    nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) {
300       if (!nir_src_is_const(src->src))
301          return false;
302    }
303 
304    return true;
305 }
306 
307 /**
308  * Splits ALU instructions that have a source that is a phi node
309  *
310  * ALU instructions in the header block of a loop that meet the following
311  * criteria can be split.
312  *
313  * - The loop has no continue instructions other than the "natural" continue
314  *   at the bottom of the loop.
315  *
316  * - At least one source of the instruction is a phi node from the header block.
317  *
318  * - Any non-phi sources of the ALU instruction come from a block that
319  *   dominates the block before the loop.  The most common failure mode for
320  *   this check is sources that are generated in the loop header block.
321  *
322  * - The phi node selects a constant or undef from the block before the loop or
323  *   the only ALU user is a trivial bcsel that gets removed by peeling the ALU
324  *
325  * The split process splits the original ALU instruction into two, one at the
326  * bottom of the loop and one at the block before the loop. The instruction
327  * before the loop computes the value on the first iteration, and the
328  * instruction at the bottom computes the value on the second, third, and so
329  * on. A new phi node is added to the header block that selects either the
330  * instruction before the loop or the one at the end, and uses of the original
331  * instruction are replaced by this phi.
332  *
333  * The splitting transforms a loop like:
334  *
335  *    vec1 32 ssa_8 = load_const (0x00000001)
336  *    vec1 32 ssa_10 = load_const (0x00000000)
337  *    // succs: block_1
338  *    loop {
339  *            block block_1:
340  *            // preds: block_0 block_4
341  *            vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
342  *            vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
343  *            vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
344  *            vec1 32 ssa_14 = iadd ssa_11, ssa_8
345  *            vec1 32 ssa_15 = b32csel ssa_13, ssa_14, ssa_12
346  *            ...
347  *            // succs: block_1
348  *    }
349  *
350  * into:
351  *
352  *    vec1 32 ssa_8 = load_const (0x00000001)
353  *    vec1 32 ssa_10 = load_const (0x00000000)
354  *    vec1 32 ssa_22 = iadd ssa_10, ssa_8
355  *    // succs: block_1
356  *    loop {
357  *            block block_1:
358  *            // preds: block_0 block_4
359  *            vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15
360  *            vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15
361  *            vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16
362  *            vec1 32 ssa_21 = phi block_0: ssa_22, block_4: ssa_20
363  *            vec1 32 ssa_15 = b32csel ssa_13, ssa_21, ssa_12
364  *            ...
365  *            vec1 32 ssa_20 = iadd ssa_15, ssa_8
366  *            // succs: block_1
367  *    }
368  */
369 static bool
opt_split_alu_of_phi(nir_builder * b,nir_loop * loop,nir_opt_if_options options)370 opt_split_alu_of_phi(nir_builder *b, nir_loop *loop, nir_opt_if_options options)
371 {
372    bool progress = false;
373    nir_block *header_block = nir_loop_first_block(loop);
374    nir_block *const prev_block =
375       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
376 
377    /* It would be insane if this were not true */
378    assert(_mesa_set_search(header_block->predecessors, prev_block));
379 
380    /* The loop must have exactly one continue block which could be a block
381     * ending in a continue instruction or the "natural" continue from the
382     * last block in the loop back to the top.
383     */
384    if (header_block->predecessors->entries != 2)
385       return false;
386 
387    nir_block *continue_block = find_continue_block(loop);
388    if (continue_block == header_block)
389       return false;
390 
391    /* If the continue block is otherwise empty, leave it that way. This must match
392     * opt_loop_peel_initial_break so that this optimization doesn't fight that one.
393     */
394    if (!nir_block_contains_work(continue_block))
395       return false;
396 
397    nir_foreach_instr_safe(instr, header_block) {
398       if (instr->type != nir_instr_type_alu)
399          continue;
400 
401       nir_alu_instr *const alu = nir_instr_as_alu(instr);
402 
403       /* nir_op_vec{2,3,4} and nir_op_mov are excluded because they can easily
404        * lead to infinite optimization loops. Splitting comparisons can lead
405        * to loop unrolling not recognizing loop termintators, and type
406        * conversions also lead to regressions.
407        */
408       if (nir_op_is_vec_or_mov(alu->op) ||
409           nir_alu_instr_is_comparison(alu) ||
410           alu_instr_is_type_conversion(alu) ||
411           /* Avoid fighting with nir_lower_64bit_phis */
412           (alu->def.bit_size == 64 && (options & nir_opt_if_avoid_64bit_phis)))
413          continue;
414 
415       bool has_phi_src_from_prev_block = false;
416       bool all_non_phi_exist_in_prev_block = true;
417       bool is_prev_result_undef = true;
418       bool is_prev_result_const = true;
419       nir_def *prev_srcs[8];     // FINISHME: Array size?
420       nir_def *continue_srcs[8]; // FINISHME: Array size?
421 
422       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
423          nir_instr *const src_instr = alu->src[i].src.ssa->parent_instr;
424 
425          /* If the source is a phi in the loop header block, then the
426           * prev_srcs and continue_srcs will come from the different sources
427           * of the phi.
428           */
429          if (src_instr->type == nir_instr_type_phi &&
430              src_instr->block == header_block) {
431             nir_phi_instr *const phi = nir_instr_as_phi(src_instr);
432 
433             /* Only strictly need to NULL out the pointers when the assertions
434              * (below) are compiled in.  Debugging a NULL pointer deref in the
435              * wild is easier than debugging a random pointer deref, so set
436              * NULL unconditionally just to be safe.
437              */
438             prev_srcs[i] = NULL;
439             continue_srcs[i] = NULL;
440 
441             nir_foreach_phi_src(src_of_phi, phi) {
442                if (src_of_phi->pred == prev_block) {
443                   if (src_of_phi->src.ssa->parent_instr->type !=
444                       nir_instr_type_undef) {
445                      is_prev_result_undef = false;
446                   }
447 
448                   if (src_of_phi->src.ssa->parent_instr->type !=
449                       nir_instr_type_load_const) {
450                      is_prev_result_const = false;
451                   }
452 
453                   prev_srcs[i] = src_of_phi->src.ssa;
454                   has_phi_src_from_prev_block = true;
455                } else
456                   continue_srcs[i] = src_of_phi->src.ssa;
457             }
458 
459             assert(prev_srcs[i] != NULL);
460             assert(continue_srcs[i] != NULL);
461          } else {
462             /* If the source is not a phi (or a phi in a block other than the
463              * loop header), then the value must exist in prev_block.
464              */
465             if (!nir_block_dominates(src_instr->block, prev_block)) {
466                all_non_phi_exist_in_prev_block = false;
467                break;
468             }
469 
470             prev_srcs[i] = alu->src[i].src.ssa;
471             continue_srcs[i] = alu->src[i].src.ssa;
472          }
473       }
474 
475       if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block)
476          continue;
477 
478       if (!is_prev_result_undef && !is_prev_result_const) {
479          /* check if the only user is a trivial bcsel */
480          if (!list_is_singular(&alu->def.uses))
481             continue;
482 
483          nir_src *use = list_first_entry(&alu->def.uses, nir_src, use_link);
484          if (nir_src_is_if(use) || !is_trivial_bcsel(nir_src_parent_instr(use), true))
485             continue;
486       }
487 
488       /* Split ALU of Phi */
489       b->cursor = nir_after_block(prev_block);
490       nir_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
491 
492       /* Make a copy of the original ALU instruction.  Replace the sources
493        * of the new instruction that read a phi with an undef source from
494        * prev_block with the non-undef source of that phi.
495        *
496        * Insert the new instruction at the end of the continue block.
497        */
498       b->cursor = nir_after_block_before_jump(continue_block);
499 
500       nir_def *const alu_copy =
501          clone_alu_and_replace_src_defs(b, alu, continue_srcs);
502 
503       /* Make a new phi node that selects a value from prev_block and the
504        * result of the new instruction from continue_block.
505        */
506       nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
507       nir_phi_instr_add_src(phi, prev_block, prev_value);
508       nir_phi_instr_add_src(phi, continue_block, alu_copy);
509 
510       nir_def_init(&phi->instr, &phi->def, alu_copy->num_components,
511                    alu_copy->bit_size);
512 
513       b->cursor = nir_after_phis(header_block);
514       nir_builder_instr_insert(b, &phi->instr);
515 
516       /* Modify all readers of the original ALU instruction to read the
517        * result of the phi.
518        */
519       nir_def_rewrite_uses(&alu->def,
520                            &phi->def);
521 
522       /* Since the original ALU instruction no longer has any readers, just
523        * remove it.
524        */
525       nir_instr_remove_v(&alu->instr);
526       nir_instr_free(&alu->instr);
527 
528       progress = true;
529    }
530 
531    return progress;
532 }
533 
534 /**
535  * Simplify a bcsel whose sources are all phi nodes from the loop header block
536  *
537  * bcsel instructions in a loop that meet the following criteria can be
538  * converted to phi nodes:
539  *
540  * - The loop has no continue instructions other than the "natural" continue
541  *   at the bottom of the loop.
542  *
543  * - All of the sources of the bcsel are phi nodes in the header block of the
544  *   loop.
545  *
546  * - The phi node representing the condition of the bcsel instruction chooses
547  *   only constant values.
548  *
549  * The contant value from the condition will select one of the other sources
550  * when entered from outside the loop and the remaining source when entered
551  * from the continue block.  Since each of these sources is also a phi node in
552  * the header block, the value of the phi node can be "evaluated."  These
553  * evaluated phi nodes provide the sources for a new phi node.  All users of
554  * the bcsel result are updated to use the phi node result.
555  *
556  * The replacement transforms loops like:
557  *
558  *    vec1 32 ssa_7 = undefined
559  *    vec1 32 ssa_8 = load_const (0x00000001)
560  *    vec1 32 ssa_9 = load_const (0x000000c8)
561  *    vec1 32 ssa_10 = load_const (0x00000000)
562  *    // succs: block_1
563  *    loop {
564  *            block block_1:
565  *            // preds: block_0 block_4
566  *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
567  *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
568  *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
569  *            vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11
570  *            vec1 32 ssa_16 = ige32 ssa_14, ssa_9
571  *            ...
572  *            vec1 32 ssa_15 = load_const (0xffffffff)
573  *            ...
574  *            vec1 32 ssa_25 = iadd ssa_14, ssa_8
575  *            // succs: block_1
576  *    }
577  *
578  * into:
579  *
580  *    vec1 32 ssa_7 = undefined
581  *    vec1 32 ssa_8 = load_const (0x00000001)
582  *    vec1 32 ssa_9 = load_const (0x000000c8)
583  *    vec1 32 ssa_10 = load_const (0x00000000)
584  *    // succs: block_1
585  *    loop {
586  *            block block_1:
587  *            // preds: block_0 block_4
588  *            vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14
589  *            vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15
590  *            vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25
591  *            vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25
592  *            vec1 32 ssa_16 = ige32 ssa_26, ssa_9
593  *            ...
594  *            vec1 32 ssa_15 = load_const (0xffffffff)
595  *            ...
596  *            vec1 32 ssa_25 = iadd ssa_26, ssa_8
597  *            // succs: block_1
598  *    }
599  *
600  * \note
601  * It may be possible modify this function to not require a phi node as the
602  * source of the bcsel that is selected when entering from outside the loop.
603  * The only restriction is that the source must be geneated outside the loop
604  * (since it will become the source of a phi node in the header block of the
605  * loop).
606  */
607 static bool
opt_simplify_bcsel_of_phi(nir_builder * b,nir_loop * loop)608 opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
609 {
610    bool progress = false;
611    nir_block *header_block = nir_loop_first_block(loop);
612    nir_block *const prev_block =
613       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
614 
615    /* It would be insane if this were not true */
616    assert(_mesa_set_search(header_block->predecessors, prev_block));
617 
618    /* The loop must have exactly one continue block which could be a block
619     * ending in a continue instruction or the "natural" continue from the
620     * last block in the loop back to the top.
621     */
622    if (header_block->predecessors->entries != 2)
623       return false;
624 
625    /* We can move any bcsel that can guaranteed to execut on every iteration
626     * of a loop.  For now this is accomplished by only taking bcsels from the
627     * header_block.  In the future, this could be expanced to include any
628     * bcsel that must come before any break.
629     *
630     * For more details, see
631     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305
632     */
633    nir_foreach_instr_safe(instr, header_block) {
634       if (!is_trivial_bcsel(instr, false))
635          continue;
636 
637       nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
638       nir_phi_instr *const cond_phi =
639          nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);
640 
641       bool entry_val = false, continue_val = false;
642       if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi,
643                                                                   prev_block,
644                                                                   &entry_val,
645                                                                   &continue_val))
646          continue;
647 
648       /* If they both execute or both don't execute, this is a job for
649        * nir_dead_cf, not this pass.
650        */
651       if ((entry_val && continue_val) || (!entry_val && !continue_val))
652          continue;
653 
654       const unsigned entry_src = entry_val ? 1 : 2;
655       const unsigned continue_src = entry_val ? 2 : 1;
656 
657       /* Create a new phi node that selects the value for prev_block from
658        * the bcsel source that is selected by entry_val and the value for
659        * continue_block from the other bcsel source.  Both sources have
660        * already been verified to be phi nodes.
661        */
662       nir_block *continue_block = find_continue_block(loop);
663       nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
664       nir_phi_instr_add_src(phi, prev_block,
665                             nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr),
666                                                        prev_block)
667                                ->src.ssa);
668 
669       nir_phi_instr_add_src(phi, continue_block,
670                             nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr),
671                                                        continue_block)
672                                ->src.ssa);
673 
674       nir_def_init(&phi->instr, &phi->def,
675                    bcsel->def.num_components,
676                    bcsel->def.bit_size);
677 
678       b->cursor = nir_after_phis(header_block);
679       nir_builder_instr_insert(b, &phi->instr);
680 
681       /* Modify all readers of the bcsel instruction to read the result of
682        * the phi.
683        */
684       nir_def_rewrite_uses(&bcsel->def,
685                            &phi->def);
686 
687       /* Since the original bcsel instruction no longer has any readers,
688        * just remove it.
689        */
690       nir_instr_remove_v(&bcsel->instr);
691       nir_instr_free(&bcsel->instr);
692 
693       progress = true;
694    }
695 
696    return progress;
697 }
698 
699 static bool
is_block_empty(nir_block * block)700 is_block_empty(nir_block *block)
701 {
702    return nir_cf_node_is_last(&block->cf_node) &&
703           exec_list_is_empty(&block->instr_list);
704 }
705 
706 /* Walk all the phis in the block immediately following the if statement and
707  * swap the blocks.
708  */
709 static void
rewrite_phi_predecessor_blocks(nir_if * nif,nir_block * old_then_block,nir_block * old_else_block,nir_block * new_then_block,nir_block * new_else_block)710 rewrite_phi_predecessor_blocks(nir_if *nif,
711                                nir_block *old_then_block,
712                                nir_block *old_else_block,
713                                nir_block *new_then_block,
714                                nir_block *new_else_block)
715 {
716    nir_block *after_if_block =
717       nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
718 
719    nir_foreach_phi(phi, after_if_block) {
720       nir_foreach_phi_src(src, phi) {
721          if (src->pred == old_then_block) {
722             src->pred = new_then_block;
723          } else if (src->pred == old_else_block) {
724             src->pred = new_else_block;
725          }
726       }
727    }
728 }
729 
730 /**
731  * This optimization turns:
732  *
733  *     if (cond) {
734  *     } else {
735  *         do_work();
736  *     }
737  *
738  * into:
739  *
740  *     if (!cond) {
741  *         do_work();
742  *     } else {
743  *     }
744  */
745 static bool
opt_if_simplification(nir_builder * b,nir_if * nif)746 opt_if_simplification(nir_builder *b, nir_if *nif)
747 {
748    /* Only simplify if the then block is empty and the else block is not. */
749    if (!is_block_empty(nir_if_first_then_block(nif)) ||
750        is_block_empty(nir_if_first_else_block(nif)))
751       return false;
752 
753    /* Insert the inverted instruction and rewrite the condition. */
754    b->cursor = nir_before_src(&nif->condition);
755    nir_src_rewrite(&nif->condition, nir_inot(b, nif->condition.ssa));
756 
757    /* Grab pointers to the last then/else blocks for fixing up the phis. */
758    nir_block *then_block = nir_if_last_then_block(nif);
759    nir_block *else_block = nir_if_last_else_block(nif);
760 
761    if (nir_block_ends_in_jump(else_block)) {
762       /* Even though this if statement has a jump on one side, we may still have
763        * phis afterwards.  Single-source phis can be produced by loop unrolling
764        * or dead control-flow passes and are perfectly legal.  Run a quick phi
765        * removal on the block after the if to clean up any such phis.
766        */
767       nir_block *const next_block =
768          nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
769       nir_opt_remove_phis_block(next_block);
770    }
771 
772    rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block,
773                                   then_block);
774 
775    /* Finally, move the else block to the then block. */
776    nir_cf_list tmp;
777    nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list),
778                   nir_after_cf_list(&nif->else_list));
779    nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list));
780 
781    return true;
782 }
783 
784 /* Find phi statements after an if that choose between true and false, and
785  * replace them with the if statement's condition (or an inot of it).
786  */
787 static bool
opt_if_phi_is_condition(nir_builder * b,nir_if * nif)788 opt_if_phi_is_condition(nir_builder *b, nir_if *nif)
789 {
790    /* Grab pointers to the last then/else blocks for looking in the phis. */
791    nir_block *then_block = nir_if_last_then_block(nif);
792    ASSERTED nir_block *else_block = nir_if_last_else_block(nif);
793    nir_def *cond = nif->condition.ssa;
794    bool progress = false;
795 
796    nir_block *after_if_block = nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
797    nir_foreach_phi_safe(phi, after_if_block) {
798       if (phi->def.bit_size != cond->bit_size ||
799           phi->def.num_components != 1)
800          continue;
801 
802       enum opt_bool {
803          T,
804          F,
805          UNKNOWN
806       } then_val = UNKNOWN,
807         else_val = UNKNOWN;
808 
809       nir_foreach_phi_src(src, phi) {
810          assert(src->pred == then_block || src->pred == else_block);
811          enum opt_bool *pred_val = src->pred == then_block ? &then_val : &else_val;
812 
813          nir_scalar val = nir_scalar_resolved(src->src.ssa, 0);
814          if (!nir_scalar_is_const(val))
815             break;
816 
817          if (nir_scalar_as_int(val) == -1)
818             *pred_val = T;
819          else if (nir_scalar_as_uint(val) == 0)
820             *pred_val = F;
821          else
822             break;
823       }
824       if (then_val == T && else_val == F) {
825          nir_def_rewrite_uses(&phi->def, cond);
826          progress = true;
827       } else if (then_val == F && else_val == T) {
828          b->cursor = nir_before_cf_node(&nif->cf_node);
829          nir_def_rewrite_uses(&phi->def, nir_inot(b, cond));
830          progress = true;
831       }
832    }
833 
834    return progress;
835 }
836 
837 static bool
evaluate_if_condition(nir_if * nif,nir_cursor cursor,bool * value)838 evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)
839 {
840    nir_block *use_block = nir_cursor_current_block(cursor);
841    if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) {
842       *value = true;
843       return true;
844    } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) {
845       *value = false;
846       return true;
847    } else {
848       return false;
849    }
850 }
851 
852 static nir_def *
clone_alu_and_replace_src_defs(nir_builder * b,const nir_alu_instr * alu,nir_def ** src_defs)853 clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
854                                nir_def **src_defs)
855 {
856    nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
857    nalu->exact = alu->exact;
858    nalu->fp_fast_math = alu->fp_fast_math;
859 
860    nir_def_init(&nalu->instr, &nalu->def,
861                 alu->def.num_components,
862                 alu->def.bit_size);
863 
864    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
865       nalu->src[i].src = nir_src_for_ssa(src_defs[i]);
866       memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
867              sizeof(nalu->src[i].swizzle));
868    }
869 
870    nir_builder_instr_insert(b, &nalu->instr);
871 
872    return &nalu->def;
873    ;
874 }
875 
876 /*
877  * This propagates if condition evaluation down the chain of some alu
878  * instructions. For example by checking the use of some of the following alu
879  * instruction we can eventually replace ssa_107 with NIR_TRUE.
880  *
881  *   loop {
882  *      block block_1:
883  *      vec1 32 ssa_85 = load_const (0x00000002)
884  *      vec1 32 ssa_86 = ieq ssa_48, ssa_85
885  *      vec1 32 ssa_87 = load_const (0x00000001)
886  *      vec1 32 ssa_88 = ieq ssa_48, ssa_87
887  *      vec1 32 ssa_89 = ior ssa_86, ssa_88
888  *      vec1 32 ssa_90 = ieq ssa_48, ssa_0
889  *      vec1 32 ssa_91 = ior ssa_89, ssa_90
890  *      if ssa_86 {
891  *         block block_2:
892  *             ...
893  *            break
894  *      } else {
895  *            block block_3:
896  *      }
897  *      block block_4:
898  *      if ssa_88 {
899  *            block block_5:
900  *             ...
901  *            break
902  *      } else {
903  *            block block_6:
904  *      }
905  *      block block_7:
906  *      if ssa_90 {
907  *            block block_8:
908  *             ...
909  *            break
910  *      } else {
911  *            block block_9:
912  *      }
913  *      block block_10:
914  *      vec1 32 ssa_107 = inot ssa_91
915  *      if ssa_107 {
916  *            block block_11:
917  *            break
918  *      } else {
919  *            block block_12:
920  *      }
921  *   }
922  */
923 static bool
propagate_condition_eval(nir_builder * b,nir_if * nif,nir_src * use_src,nir_src * alu_use,nir_alu_instr * alu)924 propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
925                          nir_src *alu_use, nir_alu_instr *alu)
926 {
927    bool bool_value;
928    b->cursor = nir_before_src(alu_use);
929    if (!evaluate_if_condition(nif, b->cursor, &bool_value))
930       return false;
931 
932    nir_def *def[NIR_MAX_VEC_COMPONENTS] = { 0 };
933    for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
934       if (alu->src[i].src.ssa == use_src->ssa) {
935          def[i] = nir_imm_bool(b, bool_value);
936       } else {
937          def[i] = alu->src[i].src.ssa;
938       }
939    }
940 
941    nir_def *nalu = clone_alu_and_replace_src_defs(b, alu, def);
942    nir_src_rewrite(alu_use, nalu);
943 
944    return true;
945 }
946 
947 static bool
can_propagate_through_alu(nir_src * src)948 can_propagate_through_alu(nir_src *src)
949 {
950    if (nir_src_parent_instr(src)->type != nir_instr_type_alu)
951       return false;
952 
953    nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(src));
954    switch (alu->op) {
955    case nir_op_ior:
956    case nir_op_iand:
957    case nir_op_inot:
958    case nir_op_b2i32:
959       return true;
960    case nir_op_bcsel:
961       return src == &alu->src[0].src;
962    default:
963       return false;
964    }
965 }
966 
967 static bool
evaluate_condition_use(nir_builder * b,nir_if * nif,nir_src * use_src)968 evaluate_condition_use(nir_builder *b, nir_if *nif, nir_src *use_src)
969 {
970    bool progress = false;
971 
972    b->cursor = nir_before_src(use_src);
973 
974    bool bool_value;
975    if (evaluate_if_condition(nif, b->cursor, &bool_value)) {
976       /* Rewrite use to use const */
977       nir_src_rewrite(use_src, nir_imm_bool(b, bool_value));
978       progress = true;
979    }
980 
981    if (!nir_src_is_if(use_src) && can_propagate_through_alu(use_src)) {
982       nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use_src));
983 
984       nir_foreach_use_including_if_safe(alu_use, &alu->def)
985          progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu);
986    }
987 
988    return progress;
989 }
990 
991 static bool
opt_if_evaluate_condition_use(nir_builder * b,nir_if * nif)992 opt_if_evaluate_condition_use(nir_builder *b, nir_if *nif)
993 {
994    bool progress = false;
995 
996    /* Evaluate any uses of the if condition inside the if branches */
997    nir_foreach_use_including_if_safe(use_src, nif->condition.ssa) {
998       if (!(nir_src_is_if(use_src) && nir_src_parent_if(use_src) == nif))
999          progress |= evaluate_condition_use(b, nif, use_src);
1000    }
1001 
1002    return progress;
1003 }
1004 
1005 static bool
rewrite_comp_uses_within_if(nir_builder * b,nir_if * nif,bool invert,nir_scalar scalar,nir_scalar new_scalar)1006 rewrite_comp_uses_within_if(nir_builder *b, nir_if *nif, bool invert,
1007                             nir_scalar scalar, nir_scalar new_scalar)
1008 {
1009    bool progress = false;
1010 
1011    nir_block *first = invert ? nir_if_first_else_block(nif) : nir_if_first_then_block(nif);
1012    nir_block *last = invert ? nir_if_last_else_block(nif) : nir_if_last_then_block(nif);
1013 
1014    nir_def *new_ssa = NULL;
1015    nir_foreach_use_safe(use, scalar.def) {
1016       if (nir_src_parent_instr(use)->block->index < first->index ||
1017           nir_src_parent_instr(use)->block->index > last->index)
1018          continue;
1019 
1020       /* Only rewrite users which use only the new component. This is to avoid a
1021        * situation where copy propagation will undo the rewrite and we risk an infinite
1022        * loop.
1023        *
1024        * We could rewrite users which use a mix of the old and new components, but if
1025        * nir_src_components_read() is incomplete, then we risk the new component actually being
1026        * unused and some optimization later undoing the rewrite.
1027        */
1028       if (nir_src_components_read(use) != BITFIELD64_BIT(scalar.comp))
1029          continue;
1030 
1031       if (!new_ssa) {
1032          b->cursor = nir_before_cf_node(&nif->cf_node);
1033          new_ssa = nir_channel(b, new_scalar.def, new_scalar.comp);
1034          if (scalar.def->num_components > 1) {
1035             nir_def *vec = nir_undef(b, scalar.def->num_components, scalar.def->bit_size);
1036             new_ssa = nir_vector_insert_imm(b, vec, new_ssa, scalar.comp);
1037          }
1038       }
1039 
1040       nir_src_rewrite(use, new_ssa);
1041       progress = true;
1042    }
1043 
1044    return progress;
1045 }
1046 
1047 /*
1048  * This optimization turns:
1049  *
1050  *     if (a == (b=readfirstlane(a)))
1051  *        use(a)
1052  *     if (c == (d=load_const))
1053  *        use(c)
1054  *
1055  * into:
1056  *
1057  *     if (a == (b=readfirstlane(a)))
1058  *        use(b)
1059  *     if (c == (d=load_const))
1060  *        use(d)
1061  */
1062 static bool
opt_if_rewrite_uniform_uses(nir_builder * b,nir_if * nif,nir_scalar cond,bool accept_ine)1063 opt_if_rewrite_uniform_uses(nir_builder *b, nir_if *nif, nir_scalar cond, bool accept_ine)
1064 {
1065    bool progress = false;
1066 
1067    if (!nir_scalar_is_alu(cond))
1068       return false;
1069 
1070    nir_op op = nir_scalar_alu_op(cond);
1071    if (op == nir_op_iand) {
1072       progress |= opt_if_rewrite_uniform_uses(b, nif, nir_scalar_chase_alu_src(cond, 0), false);
1073       progress |= opt_if_rewrite_uniform_uses(b, nif, nir_scalar_chase_alu_src(cond, 1), false);
1074       return progress;
1075    }
1076 
1077    if (op != nir_op_ieq && (op != nir_op_ine || !accept_ine))
1078       return false;
1079 
1080    for (unsigned i = 0; i < 2; i++) {
1081       nir_scalar src_uni = nir_scalar_chase_alu_src(cond, i);
1082       nir_scalar src_div = nir_scalar_chase_alu_src(cond, !i);
1083 
1084       if (nir_scalar_is_const(src_uni) && src_div.def != src_uni.def)
1085          return rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, src_div, src_uni);
1086 
1087       if (!nir_scalar_is_intrinsic(src_uni))
1088          continue;
1089       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src_uni.def->parent_instr);
1090       if (intrin->intrinsic != nir_intrinsic_read_first_invocation &&
1091           intrin->intrinsic != nir_intrinsic_read_invocation &&
1092           (intrin->intrinsic != nir_intrinsic_reduce || nir_intrinsic_cluster_size(intrin)))
1093          continue;
1094 
1095       nir_scalar intrin_src = { intrin->src[0].ssa, src_uni.comp };
1096       nir_scalar resolved_intrin_src = nir_scalar_resolved(intrin_src.def, intrin_src.comp);
1097 
1098       if (!nir_scalar_equal(resolved_intrin_src, src_div))
1099          continue;
1100 
1101       progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, resolved_intrin_src, src_uni);
1102       if (!nir_scalar_equal(intrin_src, resolved_intrin_src))
1103          progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, intrin_src, src_uni);
1104 
1105       return progress;
1106    }
1107 
1108    return false;
1109 }
1110 
1111 static void
simple_merge_if(nir_if * dest_if,nir_if * src_if,bool dest_if_then,bool src_if_then)1112 simple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then,
1113                 bool src_if_then)
1114 {
1115    /* Now merge the if branch */
1116    nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if)
1117                                       : nir_if_last_else_block(dest_if);
1118 
1119    struct exec_list *list = src_if_then ? &src_if->then_list
1120                                         : &src_if->else_list;
1121 
1122    nir_cf_list if_cf_list;
1123    nir_cf_extract(&if_cf_list, nir_before_cf_list(list),
1124                   nir_after_cf_list(list));
1125    nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk));
1126 }
1127 
1128 static bool
opt_phi_src_unused(nir_builder * b,nir_phi_instr * phi,nir_if * prev_if,nir_if * next_if)1129 opt_phi_src_unused(nir_builder *b, nir_phi_instr *phi,
1130                    nir_if *prev_if, nir_if *next_if)
1131 {
1132    /* Return early, if either of the sources is already undef. */
1133    nir_foreach_phi_src(phi_src, phi) {
1134       if (phi_src->src.ssa->parent_instr->type == nir_instr_type_undef)
1135          return false;
1136    }
1137 
1138    nir_block *first_then = nir_if_first_then_block(next_if);
1139    nir_block *first_else = nir_if_first_else_block(next_if);
1140    bool then_used = false;
1141    bool else_used = false;
1142 
1143    nir_foreach_use_including_if(use, &phi->def) {
1144       nir_block *use_block = nir_src_get_block(use);
1145 
1146       /* Check whether the if_use is on the then- or else- side. */
1147       if (nir_block_dominates(first_then, use_block))
1148          then_used = true;
1149       else if (nir_block_dominates(first_else, use_block))
1150          else_used = true;
1151       else
1152          return false;
1153       if (then_used && else_used)
1154          return false;
1155    }
1156 
1157    nir_block *unused_blk = then_used ? nir_if_last_else_block(prev_if)
1158                                      : nir_if_last_then_block(prev_if);
1159    nir_src *unused_src = &nir_phi_get_src_from_block(phi, unused_blk)->src;
1160 
1161    /* Create undef and replace phi-src. */
1162    b->cursor = nir_before_cf_node(&prev_if->cf_node);
1163    nir_def *undef = nir_undef(b, phi->def.num_components, phi->def.bit_size);
1164    nir_src_rewrite(unused_src, undef);
1165 
1166    return true;
1167 }
1168 
1169 /*
1170  * This small optimization targets phis between two IF statements with
1171  * the same condition.  If the phi dst is only used in one branch leg,
1172  * the 'unused' phi source gets replaced with undef.
1173  */
1174 static bool
opt_if_phi_src_unused(nir_builder * b,nir_if * nif)1175 opt_if_phi_src_unused(nir_builder *b, nir_if *nif)
1176 {
1177    bool progress = false;
1178 
1179    nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node);
1180    nir_if *next_if = nir_block_get_following_if(next_blk);
1181    if (!next_if || !nir_srcs_equal(nif->condition, next_if->condition))
1182       return false;
1183 
1184    nir_foreach_phi(phi, next_blk) {
1185       progress |= opt_phi_src_unused(b, phi, nif, next_if);
1186    }
1187 
1188    return progress;
1189 }
1190 
1191 static void
rewrite_phi_uses(nir_phi_instr * phi,nir_if * prev_if,nir_if * next_if)1192 rewrite_phi_uses(nir_phi_instr *phi, nir_if *prev_if, nir_if *next_if)
1193 {
1194    nir_def *then_src =
1195       nir_phi_get_src_from_block(phi, nir_if_last_then_block(prev_if))->src.ssa;
1196    nir_def *else_src =
1197       nir_phi_get_src_from_block(phi, nir_if_last_else_block(prev_if))->src.ssa;
1198 
1199    /* Rewrite all uses inside the next IF with either then_src or else_src. */
1200    nir_foreach_use_including_if_safe(use, &phi->def) {
1201       nir_cf_node *node = &nir_src_get_block(use)->cf_node;
1202 
1203       /* Check if the node is inside the if-stmt. */
1204       while (node) {
1205          if (node->parent == &next_if->cf_node)
1206             break;
1207 
1208          node = node->parent;
1209          if (node == prev_if->cf_node.parent)
1210             node = NULL;
1211       }
1212 
1213       /* The node is outside of the IF. */
1214       if (!node)
1215          continue;
1216 
1217       /* Get the first block. */
1218       while (!nir_cf_node_is_first(node))
1219          node = nir_cf_node_prev(node);
1220 
1221       nir_block *block = nir_cf_node_as_block(node);
1222       nir_src_rewrite(use, block == nir_if_first_then_block(next_if) ? then_src : else_src);
1223    }
1224 }
1225 
1226 static bool
opt_if_merge(nir_if * nif)1227 opt_if_merge(nir_if *nif)
1228 {
1229    nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node);
1230    if (!next_blk)
1231       return false;
1232 
1233    nir_if *next_if = nir_block_get_following_if(next_blk);
1234    if (!next_if || !nir_srcs_equal(nif->condition, next_if->condition))
1235       return false;
1236 
1237    /* This optimization isn't made to work in this case and
1238     * opt_if_evaluate_condition_use will optimize it later.
1239     */
1240    if (nir_block_ends_in_jump(nir_if_last_then_block(nif)) ||
1241        nir_block_ends_in_jump(nir_if_last_else_block(nif)))
1242       return false;
1243 
1244    /* This optimization is not prepared to handle updating phis other than
1245     * immediately after the second if-statement.
1246     */
1247    if (nir_block_ends_in_jump(nir_if_last_then_block(next_if)) ||
1248        nir_block_ends_in_jump(nir_if_last_else_block(next_if)))
1249       return false;
1250 
1251    /* Ensure that there is nothing but phis between the IFs */
1252    if (!exec_list_is_empty(&next_blk->instr_list)) {
1253       if (nir_block_last_instr(next_blk)->type != nir_instr_type_phi)
1254          return false;
1255 
1256       /* If there are phis, then the next IF must not contain any jump. */
1257       nir_foreach_block_in_cf_node(block, &next_if->cf_node) {
1258          if (nir_block_ends_in_jump(block))
1259             return false;
1260       }
1261    }
1262 
1263    nir_foreach_phi(phi, next_blk) {
1264       /* Rewrite the phi uses in each branch leg of the following IF
1265        * with the phi source from the respective branch leg of the
1266        * previous IF.
1267        */
1268       rewrite_phi_uses(phi, nif, next_if);
1269    }
1270 
1271    /* Here we merge two consecutive ifs that have the same condition e.g:
1272     *
1273     *   if ssa_12 {
1274     *      ...
1275     *   } else {
1276     *      ...
1277     *   }
1278     *   if ssa_12 {
1279     *      ...
1280     *   } else {
1281     *      ...
1282     *   }
1283     *
1284     * Note: This only merges if-statements when the block between them is
1285     * empty except for phis.
1286     */
1287    simple_merge_if(nif, next_if, true, true);
1288    simple_merge_if(nif, next_if, false, false);
1289 
1290    nir_block *new_then_block = nir_if_last_then_block(nif);
1291    nir_block *new_else_block = nir_if_last_else_block(nif);
1292 
1293    nir_block *old_then_block = nir_if_last_then_block(next_if);
1294    nir_block *old_else_block = nir_if_last_else_block(next_if);
1295 
1296    /* Rewrite the predecessor block for any phis following the second
1297     * if-statement.
1298     */
1299    rewrite_phi_predecessor_blocks(next_if, old_then_block,
1300                                   old_else_block,
1301                                   new_then_block,
1302                                   new_else_block);
1303 
1304    /* Move phis after merged if to avoid them being deleted when we remove
1305     * the merged if-statement.
1306     */
1307    nir_block *after_next_if_block =
1308       nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node));
1309 
1310    nir_foreach_phi_safe(phi, after_next_if_block) {
1311       exec_node_remove(&phi->instr.node);
1312       exec_list_push_tail(&next_blk->instr_list, &phi->instr.node);
1313       phi->instr.block = next_blk;
1314    }
1315 
1316    nir_cf_node_remove(&next_if->cf_node);
1317 
1318    return true;
1319 }
1320 
1321 static bool
opt_if_cf_list(nir_builder * b,struct exec_list * cf_list,nir_opt_if_options options)1322 opt_if_cf_list(nir_builder *b, struct exec_list *cf_list,
1323                nir_opt_if_options options)
1324 {
1325    bool progress = false;
1326    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1327       switch (cf_node->type) {
1328       case nir_cf_node_block:
1329          break;
1330 
1331       case nir_cf_node_if: {
1332          nir_if *nif = nir_cf_node_as_if(cf_node);
1333          progress |= opt_if_cf_list(b, &nif->then_list,
1334                                     options);
1335          progress |= opt_if_cf_list(b, &nif->else_list,
1336                                     options);
1337          progress |= opt_if_merge(nif);
1338          progress |= opt_if_simplification(b, nif);
1339          if (options & nir_opt_if_optimize_phi_true_false)
1340             progress |= opt_if_phi_is_condition(b, nif);
1341          break;
1342       }
1343 
1344       case nir_cf_node_loop: {
1345          nir_loop *loop = nir_cf_node_as_loop(cf_node);
1346          assert(!nir_loop_has_continue_construct(loop));
1347          progress |= opt_if_cf_list(b, &loop->body,
1348                                     options);
1349          progress |= opt_simplify_bcsel_of_phi(b, loop);
1350          break;
1351       }
1352 
1353       case nir_cf_node_function:
1354          unreachable("Invalid cf type");
1355       }
1356    }
1357 
1358    return progress;
1359 }
1360 
1361 /**
1362  * Optimizations which can create registers are done after other optimizations
1363  * which require SSA.
1364  */
1365 static bool
opt_if_regs_cf_list(struct exec_list * cf_list)1366 opt_if_regs_cf_list(struct exec_list *cf_list)
1367 {
1368    bool progress = false;
1369    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1370       switch (cf_node->type) {
1371       case nir_cf_node_block:
1372          break;
1373 
1374       case nir_cf_node_if: {
1375          nir_if *nif = nir_cf_node_as_if(cf_node);
1376          progress |= opt_if_regs_cf_list(&nif->then_list);
1377          progress |= opt_if_regs_cf_list(&nif->else_list);
1378          break;
1379       }
1380 
1381       case nir_cf_node_loop: {
1382          nir_loop *loop = nir_cf_node_as_loop(cf_node);
1383          assert(!nir_loop_has_continue_construct(loop));
1384          progress |= opt_if_regs_cf_list(&loop->body);
1385          progress |= opt_peel_loop_initial_if(loop);
1386          break;
1387       }
1388 
1389       case nir_cf_node_function:
1390          unreachable("Invalid cf type");
1391       }
1392    }
1393 
1394    return progress;
1395 }
1396 
1397 /**
1398  * These optimisations depend on nir_metadata_block_index and therefore must
1399  * not do anything to cause the metadata to become invalid.
1400  */
1401 static bool
opt_if_safe_cf_list(nir_builder * b,struct exec_list * cf_list,nir_opt_if_options options)1402 opt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list, nir_opt_if_options options)
1403 {
1404    bool progress = false;
1405    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
1406       switch (cf_node->type) {
1407       case nir_cf_node_block:
1408          break;
1409 
1410       case nir_cf_node_if: {
1411          nir_if *nif = nir_cf_node_as_if(cf_node);
1412          progress |= opt_if_safe_cf_list(b, &nif->then_list, options);
1413          progress |= opt_if_safe_cf_list(b, &nif->else_list, options);
1414          progress |= opt_if_evaluate_condition_use(b, nif);
1415          nir_scalar cond = nir_scalar_resolved(nif->condition.ssa, 0);
1416          progress |= opt_if_rewrite_uniform_uses(b, nif, cond, true);
1417          progress |= opt_if_phi_src_unused(b, nif);
1418          break;
1419       }
1420 
1421       case nir_cf_node_loop: {
1422          nir_loop *loop = nir_cf_node_as_loop(cf_node);
1423          assert(!nir_loop_has_continue_construct(loop));
1424          progress |= opt_if_safe_cf_list(b, &loop->body, options);
1425          progress |= opt_split_alu_of_phi(b, loop, options);
1426          break;
1427       }
1428 
1429       case nir_cf_node_function:
1430          unreachable("Invalid cf type");
1431       }
1432    }
1433 
1434    return progress;
1435 }
1436 
1437 bool
nir_opt_if(nir_shader * shader,nir_opt_if_options options)1438 nir_opt_if(nir_shader *shader, nir_opt_if_options options)
1439 {
1440    bool progress = false;
1441 
1442    nir_foreach_function_impl(impl, shader) {
1443       nir_builder b = nir_builder_create(impl);
1444 
1445       nir_metadata_require(impl, nir_metadata_control_flow);
1446       progress = opt_if_safe_cf_list(&b, &impl->body, options);
1447       nir_metadata_preserve(impl, nir_metadata_control_flow);
1448 
1449       bool preserve = true;
1450 
1451       if (opt_if_cf_list(&b, &impl->body, options)) {
1452          preserve = false;
1453          progress = true;
1454       }
1455 
1456       if (opt_if_regs_cf_list(&impl->body)) {
1457          preserve = false;
1458          progress = true;
1459 
1460          /* If that made progress, we're no longer really in SSA form.  We
1461           * need to convert registers back into SSA defs and clean up SSA defs
1462           * that don't dominate their uses.
1463           */
1464          nir_lower_reg_intrinsics_to_ssa_impl(impl);
1465       }
1466 
1467       if (preserve) {
1468          nir_metadata_preserve(impl, nir_metadata_none);
1469       } else {
1470          nir_metadata_preserve(impl, nir_metadata_all);
1471       }
1472    }
1473 
1474    return progress;
1475 }
1476