xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_move_vec_src_uses_to_dest.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 
26 /*
27  * Implements a pass that tries to move uses vecN sources to their
28  * destinations.  This is kind of like an inverse copy-propagation pass.
29  * For instance, if you have
30  *
31  * ssa_1 = vec4(a, b, c, d)
32  * ssa_2 = fadd(a, b)
33  *
34  * This will be turned into
35  *
36  * ssa_1 = vec4(a, b, c, d)
37  * ssa_2 = fadd(ssa_1.x, ssa_1.y)
38  *
39  * While this is "worse" because it adds a bunch of unneeded dependencies, it
40  * actually makes it much easier for vec4-based backends to coalesce the MOVs
41  * that result from the vec4 operation because it doesn't have to worry about
42  * quite as many reads.
43  */
44 
45 /* Returns true if the given SSA def dominates the instruction.  An SSA def is
46  * considered to *not* dominate the instruction that defines it.
47  */
48 static bool
ssa_def_dominates_instr(nir_def * def,nir_instr * instr)49 ssa_def_dominates_instr(nir_def *def, nir_instr *instr)
50 {
51    if (instr->index <= def->parent_instr->index) {
52       return false;
53    } else if (def->parent_instr->block == instr->block) {
54       return def->parent_instr->index < instr->index;
55    } else {
56       return nir_block_dominates(def->parent_instr->block, instr->block);
57    }
58 }
59 
60 static bool
move_vec_src_uses_to_dest_block(nir_block * block,bool skip_const_srcs)61 move_vec_src_uses_to_dest_block(nir_block *block, bool skip_const_srcs)
62 {
63    bool progress = false;
64 
65    nir_foreach_instr(instr, block) {
66       if (instr->type != nir_instr_type_alu)
67          continue;
68 
69       nir_alu_instr *vec = nir_instr_as_alu(instr);
70 
71       switch (vec->op) {
72       case nir_op_vec2:
73       case nir_op_vec3:
74       case nir_op_vec4:
75          break;
76       default:
77          continue; /* The loop */
78       }
79 
80       /* If the vec is used only in single store output than by reusing it
81        * we lose the ability to write it to the output directly.
82        */
83       if (list_is_singular(&vec->def.uses)) {
84          nir_src *src = list_first_entry(&vec->def.uses, nir_src, use_link);
85          nir_instr *use_instr = nir_src_parent_instr(src);
86          if (use_instr->type == nir_instr_type_intrinsic) {
87             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
88             if (intr->intrinsic == nir_intrinsic_store_output)
89                return false;
90          }
91       }
92 
93       /* First, mark all of the sources we are going to consider for rewriting
94        * to the destination
95        */
96       int srcs_remaining = 0;
97       for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
98          if (skip_const_srcs && nir_src_is_const(vec->src[i].src))
99             continue;
100 
101          srcs_remaining |= 1 << i;
102       }
103 
104       /* We can't actually do anything with this instruction */
105       if (srcs_remaining == 0)
106          continue;
107 
108       for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
109          int8_t swizzle[NIR_MAX_VEC_COMPONENTS];
110          memset(swizzle, -1, sizeof(swizzle));
111 
112          for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
113             if (vec->src[j].src.ssa != vec->src[i].src.ssa)
114                continue;
115 
116             /* Mark the given channel as having been handled */
117             srcs_remaining &= ~(1 << j);
118 
119             /* Mark the appropriate channel as coming from src j */
120             swizzle[vec->src[j].swizzle[0]] = j;
121          }
122 
123          nir_foreach_use_safe(use, vec->src[i].src.ssa) {
124             if (nir_src_parent_instr(use) == &vec->instr)
125                continue;
126 
127             /* We need to dominate the use if we are going to rewrite it */
128             if (!ssa_def_dominates_instr(&vec->def, nir_src_parent_instr(use)))
129                continue;
130 
131             /* For now, we'll just rewrite ALU instructions */
132             if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
133                continue;
134 
135             nir_alu_instr *use_alu = nir_instr_as_alu(nir_src_parent_instr(use));
136 
137             /* Figure out which source we're actually looking at */
138             nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
139             unsigned src_idx = use_alu_src - use_alu->src;
140             assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
141 
142             bool can_reswizzle = true;
143             for (unsigned j = 0; j < 4; j++) {
144                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
145                   continue;
146 
147                if (swizzle[use_alu_src->swizzle[j]] == -1) {
148                   can_reswizzle = false;
149                   break;
150                }
151             }
152 
153             if (!can_reswizzle)
154                continue;
155 
156             /* At this point, we have determined that the given use can be
157              * reswizzled to actually use the destination of the vecN operation.
158              * Go ahead and rewrite it as needed.
159              */
160             nir_src_rewrite(use, &vec->def);
161             for (unsigned j = 0; j < 4; j++) {
162                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
163                   continue;
164 
165                use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
166                progress = true;
167             }
168          }
169       }
170    }
171 
172    return progress;
173 }
174 
175 static bool
nir_move_vec_src_uses_to_dest_impl(nir_shader * shader,nir_function_impl * impl,bool skip_const_srcs)176 nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl,
177                                    bool skip_const_srcs)
178 {
179    bool progress = false;
180 
181    nir_metadata_require(impl, nir_metadata_dominance);
182 
183    nir_index_instrs(impl);
184 
185    nir_foreach_block(block, impl) {
186       progress |= move_vec_src_uses_to_dest_block(block, skip_const_srcs);
187    }
188 
189    nir_metadata_preserve(impl, nir_metadata_control_flow);
190 
191    return progress;
192 }
193 
194 bool
nir_move_vec_src_uses_to_dest(nir_shader * shader,bool skip_const_srcs)195 nir_move_vec_src_uses_to_dest(nir_shader *shader, bool skip_const_srcs)
196 {
197    bool progress = false;
198 
199    nir_foreach_function_impl(impl, shader) {
200       progress |= nir_move_vec_src_uses_to_dest_impl(shader, impl, skip_const_srcs);
201    }
202 
203    return progress;
204 }
205