xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_legacy.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Valve Corporation
3  * Copyright 2020 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "nir_legacy.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10 
11 bool
nir_legacy_float_mod_folds(nir_alu_instr * mod)12 nir_legacy_float_mod_folds(nir_alu_instr *mod)
13 {
14    assert(mod->op == nir_op_fabs || mod->op == nir_op_fneg);
15 
16    /* No legacy user supports fp64 modifiers */
17    if (mod->def.bit_size == 64)
18       return false;
19 
20    nir_foreach_use_including_if(src, &mod->def) {
21       if (nir_src_is_if(src))
22          return false;
23 
24       nir_instr *parent = nir_src_parent_instr(src);
25       if (parent->type != nir_instr_type_alu)
26          return false;
27 
28       nir_alu_instr *alu = nir_instr_as_alu(parent);
29       nir_alu_src *alu_src = list_entry(src, nir_alu_src, src);
30       unsigned src_index = alu_src - alu->src;
31 
32       assert(src_index < nir_op_infos[alu->op].num_inputs);
33       nir_alu_type src_type = nir_op_infos[alu->op].input_types[src_index];
34 
35       if (nir_alu_type_get_base_type(src_type) != nir_type_float)
36          return false;
37    }
38 
39    return true;
40 }
41 
42 static nir_legacy_alu_src
chase_alu_src_helper(const nir_src * src)43 chase_alu_src_helper(const nir_src *src)
44 {
45    nir_intrinsic_instr *load = nir_load_reg_for_def(src->ssa);
46 
47    if (load) {
48       bool indirect = (load->intrinsic == nir_intrinsic_load_reg_indirect);
49 
50       return (nir_legacy_alu_src){
51          .src.is_ssa = false,
52          .src.reg = {
53             .handle = load->src[0].ssa,
54             .base_offset = nir_intrinsic_base(load),
55             .indirect = indirect ? load->src[1].ssa : NULL },
56          .fabs = nir_intrinsic_legacy_fabs(load),
57          .fneg = nir_intrinsic_legacy_fneg(load),
58       };
59    } else {
60       return (nir_legacy_alu_src){
61          .src.is_ssa = true,
62          .src.ssa = src->ssa,
63       };
64    }
65 }
66 
67 static inline bool
chase_source_mod(nir_def ** ssa,nir_op op,uint8_t * swizzle)68 chase_source_mod(nir_def **ssa, nir_op op, uint8_t *swizzle)
69 {
70    if ((*ssa)->parent_instr->type != nir_instr_type_alu)
71       return false;
72 
73    nir_alu_instr *alu = nir_instr_as_alu((*ssa)->parent_instr);
74    if (alu->op != op)
75       return false;
76 
77    /* If there are other uses of the modifier that don't fold, we can't fold it
78     * here either, in case of it's reading from a load_reg that won't be
79     * emitted.
80     */
81    if (!nir_legacy_float_mod_folds(alu))
82       return false;
83 
84    /* This only works for unary ops */
85    assert(nir_op_infos[op].num_inputs == 1);
86 
87    /* To fuse the source mod in, we need to compose the swizzles and string
88     * through the source.
89     */
90    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
91       swizzle[i] = alu->src[0].swizzle[swizzle[i]];
92 
93    *ssa = alu->src[0].src.ssa;
94    return true;
95 }
96 
97 nir_legacy_alu_src
nir_legacy_chase_alu_src(const nir_alu_src * src,bool fuse_fabs)98 nir_legacy_chase_alu_src(const nir_alu_src *src, bool fuse_fabs)
99 {
100    if (src->src.ssa->parent_instr->type == nir_instr_type_alu) {
101       nir_legacy_alu_src out = {
102          .src.is_ssa = true,
103          .src.ssa = src->src.ssa,
104       };
105       STATIC_ASSERT(sizeof(src->swizzle) == sizeof(out.swizzle));
106       memcpy(out.swizzle, src->swizzle, sizeof(src->swizzle));
107 
108       /* To properly handle foo(fneg(fabs(x))), we first chase fneg and then
109        * fabs, since we chase from bottom-up. We don't handle fabs(fneg(x))
110        * since nir_opt_algebraic should have eliminated that.
111        */
112       out.fneg = chase_source_mod(&out.src.ssa, nir_op_fneg, out.swizzle);
113       if (fuse_fabs)
114          out.fabs = chase_source_mod(&out.src.ssa, nir_op_fabs, out.swizzle);
115 
116       return out;
117    } else {
118       nir_legacy_alu_src out = chase_alu_src_helper(&src->src);
119       memcpy(out.swizzle, src->swizzle, sizeof(src->swizzle));
120       return out;
121    }
122 }
123 
124 static nir_legacy_alu_dest
chase_alu_dest_helper(nir_def * def)125 chase_alu_dest_helper(nir_def *def)
126 {
127    nir_intrinsic_instr *store = nir_store_reg_for_def(def);
128 
129    if (store) {
130       bool indirect = (store->intrinsic == nir_intrinsic_store_reg_indirect);
131 
132       return (nir_legacy_alu_dest){
133          .dest.is_ssa = false,
134          .dest.reg = {
135             .handle = store->src[1].ssa,
136             .base_offset = nir_intrinsic_base(store),
137             .indirect = indirect ? store->src[2].ssa : NULL },
138          .fsat = nir_intrinsic_legacy_fsat(store),
139          .write_mask = nir_intrinsic_write_mask(store),
140       };
141    } else {
142       return (nir_legacy_alu_dest){
143          .dest.is_ssa = true,
144          .dest.ssa = def,
145          .write_mask = nir_component_mask(def->num_components),
146       };
147    }
148 }
149 
150 bool
nir_legacy_fsat_folds(nir_alu_instr * fsat)151 nir_legacy_fsat_folds(nir_alu_instr *fsat)
152 {
153    assert(fsat->op == nir_op_fsat);
154    nir_def *def = fsat->src[0].src.ssa;
155 
156    /* No legacy user supports fp64 modifiers */
157    if (def->bit_size == 64)
158       return false;
159 
160    /* Must be the only use */
161    if (!list_is_singular(&def->uses))
162       return false;
163 
164    assert(&fsat->src[0].src ==
165           list_first_entry(&def->uses, nir_src, use_link));
166 
167    nir_instr *generate = def->parent_instr;
168    if (generate->type != nir_instr_type_alu)
169       return false;
170 
171    nir_alu_instr *generate_alu = nir_instr_as_alu(generate);
172    nir_alu_type dest_type = nir_op_infos[generate_alu->op].output_type;
173    if (dest_type != nir_type_float)
174       return false;
175 
176    /* If we are a saturating a source modifier fsat(fabs(x)), we need to emit
177     * either the fsat or the modifier or else the sequence disappears.
178     */
179    if (generate_alu->op == nir_op_fabs || generate_alu->op == nir_op_fneg)
180       return false;
181 
182    /* We can't do expansions without a move in the middle */
183    unsigned nr_components = generate_alu->def.num_components;
184    if (fsat->def.num_components != nr_components)
185       return false;
186 
187    /* We don't handle swizzles here, so check for the identity */
188    for (unsigned i = 0; i < nr_components; ++i) {
189       if (fsat->src[0].swizzle[i] != i)
190          return false;
191    }
192 
193    return true;
194 }
195 
196 static inline bool
chase_fsat(nir_def ** def)197 chase_fsat(nir_def **def)
198 {
199    /* No legacy user supports fp64 modifiers */
200    if ((*def)->bit_size == 64)
201       return false;
202 
203    if (!list_is_singular(&(*def)->uses))
204       return false;
205 
206    nir_src *use = list_first_entry(&(*def)->uses, nir_src, use_link);
207    if (nir_src_is_if(use) || nir_src_parent_instr(use)->type != nir_instr_type_alu)
208       return false;
209 
210    nir_alu_instr *fsat = nir_instr_as_alu(nir_src_parent_instr(use));
211    if (fsat->op != nir_op_fsat || !nir_legacy_fsat_folds(fsat))
212       return false;
213 
214    /* Otherwise, we're good */
215    nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
216    *def = &alu->def;
217    return true;
218 }
219 
220 nir_legacy_alu_dest
nir_legacy_chase_alu_dest(nir_def * def)221 nir_legacy_chase_alu_dest(nir_def *def)
222 {
223    /* Try SSA fsat. No users support 64-bit modifiers. */
224    if (chase_fsat(&def)) {
225       return (nir_legacy_alu_dest){
226          .dest.is_ssa = true,
227          .dest.ssa = def,
228          .fsat = true,
229          .write_mask = nir_component_mask(def->num_components),
230       };
231    } else {
232       return chase_alu_dest_helper(def);
233    }
234 }
235 
236 nir_legacy_src
nir_legacy_chase_src(const nir_src * src)237 nir_legacy_chase_src(const nir_src *src)
238 {
239    nir_legacy_alu_src alu_src = chase_alu_src_helper(src);
240    assert(!alu_src.fabs && !alu_src.fneg);
241    return alu_src.src;
242 }
243 
244 nir_legacy_dest
nir_legacy_chase_dest(nir_def * def)245 nir_legacy_chase_dest(nir_def *def)
246 {
247    nir_legacy_alu_dest alu_dest = chase_alu_dest_helper(def);
248    assert(!alu_dest.fsat);
249    assert(alu_dest.write_mask == nir_component_mask(def->num_components));
250 
251    return alu_dest.dest;
252 }
253 
254 static bool
fuse_mods_with_registers(nir_builder * b,nir_instr * instr,void * fuse_fabs_)255 fuse_mods_with_registers(nir_builder *b, nir_instr *instr, void *fuse_fabs_)
256 {
257    bool *fuse_fabs = fuse_fabs_;
258    if (instr->type != nir_instr_type_alu)
259       return false;
260 
261    nir_alu_instr *alu = nir_instr_as_alu(instr);
262    if ((alu->op == nir_op_fneg || (*fuse_fabs && alu->op == nir_op_fabs)) &&
263        nir_legacy_float_mod_folds(alu)) {
264       /* Try to fold this instruction into the load, if possible.  We only do
265        * this for loads in the same block as the use because uses of loads
266        * which cross block boundaries aren't trivial anyway.
267        */
268       nir_intrinsic_instr *load = nir_load_reg_for_def(alu->src[0].src.ssa);
269       if (load != NULL) {
270          /* Duplicate the load before changing it in case there are other
271           * users. We assume someone has run CSE so there should be at most
272           * four load instructions generated (one for each possible modifier
273           * combination), but likely only one or two.
274           */
275          b->cursor = nir_before_instr(&load->instr);
276          load = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &load->instr));
277          nir_builder_instr_insert(b, &load->instr);
278 
279          if (alu->op == nir_op_fabs) {
280             nir_intrinsic_set_legacy_fabs(load, true);
281             nir_intrinsic_set_legacy_fneg(load, false);
282          } else {
283             assert(alu->op == nir_op_fneg);
284             bool old_fneg = nir_intrinsic_legacy_fneg(load);
285             nir_intrinsic_set_legacy_fneg(load, !old_fneg);
286          }
287 
288          /* Rewrite all the users to use the modified load instruction.  We
289           * already know that they're all float ALU instructions because
290           * nir_legacy_float_mod_folds() returned true.
291           */
292          nir_foreach_use_including_if_safe(use, &alu->def) {
293             assert(!nir_src_is_if(use));
294             assert(nir_src_parent_instr(use)->type == nir_instr_type_alu);
295             nir_alu_src *alu_use = list_entry(use, nir_alu_src, src);
296             nir_src_rewrite(&alu_use->src, &load->def);
297             for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
298                alu_use->swizzle[i] = alu->src[0].swizzle[alu_use->swizzle[i]];
299          }
300 
301          nir_instr_remove(&alu->instr);
302 
303          return true;
304       } else {
305          /* We don't want to attempt to add saturate to foldable mod ops */
306          return false;
307       }
308    }
309 
310    nir_legacy_alu_dest dest = nir_legacy_chase_alu_dest(&alu->def);
311    if (dest.fsat) {
312       nir_intrinsic_instr *store = nir_store_reg_for_def(dest.dest.ssa);
313 
314       if (store) {
315          nir_intrinsic_set_legacy_fsat(store, true);
316          nir_src_rewrite(&store->src[0], &alu->def);
317          return true;
318       }
319    }
320 
321    return false;
322 }
323 
324 void
nir_legacy_trivialize(nir_shader * s,bool fuse_fabs)325 nir_legacy_trivialize(nir_shader *s, bool fuse_fabs)
326 {
327    /* First, fuse modifiers with registers. This ensures that the helpers do not
328     * chase registers recursively, allowing registers to be trivialized easier.
329     */
330    if (nir_shader_instructions_pass(s, fuse_mods_with_registers,
331                                     nir_metadata_control_flow,
332                                     &fuse_fabs)) {
333       /* If we made progress, we likely left dead loads. Clean them up. */
334       NIR_PASS_V(s, nir_opt_dce);
335    }
336 
337    /* Now that modifiers are dealt with, we can trivialize the regular way. */
338    NIR_PASS_V(s, nir_trivialize_registers);
339 }
340