1 /*
2 * Copyright 2023 Valve Corporation
3 * Copyright 2020 Collabora, Ltd.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "nir_legacy.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10
11 bool
nir_legacy_float_mod_folds(nir_alu_instr * mod)12 nir_legacy_float_mod_folds(nir_alu_instr *mod)
13 {
14 assert(mod->op == nir_op_fabs || mod->op == nir_op_fneg);
15
16 /* No legacy user supports fp64 modifiers */
17 if (mod->def.bit_size == 64)
18 return false;
19
20 nir_foreach_use_including_if(src, &mod->def) {
21 if (nir_src_is_if(src))
22 return false;
23
24 nir_instr *parent = nir_src_parent_instr(src);
25 if (parent->type != nir_instr_type_alu)
26 return false;
27
28 nir_alu_instr *alu = nir_instr_as_alu(parent);
29 nir_alu_src *alu_src = list_entry(src, nir_alu_src, src);
30 unsigned src_index = alu_src - alu->src;
31
32 assert(src_index < nir_op_infos[alu->op].num_inputs);
33 nir_alu_type src_type = nir_op_infos[alu->op].input_types[src_index];
34
35 if (nir_alu_type_get_base_type(src_type) != nir_type_float)
36 return false;
37 }
38
39 return true;
40 }
41
42 static nir_legacy_alu_src
chase_alu_src_helper(const nir_src * src)43 chase_alu_src_helper(const nir_src *src)
44 {
45 nir_intrinsic_instr *load = nir_load_reg_for_def(src->ssa);
46
47 if (load) {
48 bool indirect = (load->intrinsic == nir_intrinsic_load_reg_indirect);
49
50 return (nir_legacy_alu_src){
51 .src.is_ssa = false,
52 .src.reg = {
53 .handle = load->src[0].ssa,
54 .base_offset = nir_intrinsic_base(load),
55 .indirect = indirect ? load->src[1].ssa : NULL },
56 .fabs = nir_intrinsic_legacy_fabs(load),
57 .fneg = nir_intrinsic_legacy_fneg(load),
58 };
59 } else {
60 return (nir_legacy_alu_src){
61 .src.is_ssa = true,
62 .src.ssa = src->ssa,
63 };
64 }
65 }
66
67 static inline bool
chase_source_mod(nir_def ** ssa,nir_op op,uint8_t * swizzle)68 chase_source_mod(nir_def **ssa, nir_op op, uint8_t *swizzle)
69 {
70 if ((*ssa)->parent_instr->type != nir_instr_type_alu)
71 return false;
72
73 nir_alu_instr *alu = nir_instr_as_alu((*ssa)->parent_instr);
74 if (alu->op != op)
75 return false;
76
77 /* If there are other uses of the modifier that don't fold, we can't fold it
78 * here either, in case of it's reading from a load_reg that won't be
79 * emitted.
80 */
81 if (!nir_legacy_float_mod_folds(alu))
82 return false;
83
84 /* This only works for unary ops */
85 assert(nir_op_infos[op].num_inputs == 1);
86
87 /* To fuse the source mod in, we need to compose the swizzles and string
88 * through the source.
89 */
90 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
91 swizzle[i] = alu->src[0].swizzle[swizzle[i]];
92
93 *ssa = alu->src[0].src.ssa;
94 return true;
95 }
96
97 nir_legacy_alu_src
nir_legacy_chase_alu_src(const nir_alu_src * src,bool fuse_fabs)98 nir_legacy_chase_alu_src(const nir_alu_src *src, bool fuse_fabs)
99 {
100 if (src->src.ssa->parent_instr->type == nir_instr_type_alu) {
101 nir_legacy_alu_src out = {
102 .src.is_ssa = true,
103 .src.ssa = src->src.ssa,
104 };
105 STATIC_ASSERT(sizeof(src->swizzle) == sizeof(out.swizzle));
106 memcpy(out.swizzle, src->swizzle, sizeof(src->swizzle));
107
108 /* To properly handle foo(fneg(fabs(x))), we first chase fneg and then
109 * fabs, since we chase from bottom-up. We don't handle fabs(fneg(x))
110 * since nir_opt_algebraic should have eliminated that.
111 */
112 out.fneg = chase_source_mod(&out.src.ssa, nir_op_fneg, out.swizzle);
113 if (fuse_fabs)
114 out.fabs = chase_source_mod(&out.src.ssa, nir_op_fabs, out.swizzle);
115
116 return out;
117 } else {
118 nir_legacy_alu_src out = chase_alu_src_helper(&src->src);
119 memcpy(out.swizzle, src->swizzle, sizeof(src->swizzle));
120 return out;
121 }
122 }
123
124 static nir_legacy_alu_dest
chase_alu_dest_helper(nir_def * def)125 chase_alu_dest_helper(nir_def *def)
126 {
127 nir_intrinsic_instr *store = nir_store_reg_for_def(def);
128
129 if (store) {
130 bool indirect = (store->intrinsic == nir_intrinsic_store_reg_indirect);
131
132 return (nir_legacy_alu_dest){
133 .dest.is_ssa = false,
134 .dest.reg = {
135 .handle = store->src[1].ssa,
136 .base_offset = nir_intrinsic_base(store),
137 .indirect = indirect ? store->src[2].ssa : NULL },
138 .fsat = nir_intrinsic_legacy_fsat(store),
139 .write_mask = nir_intrinsic_write_mask(store),
140 };
141 } else {
142 return (nir_legacy_alu_dest){
143 .dest.is_ssa = true,
144 .dest.ssa = def,
145 .write_mask = nir_component_mask(def->num_components),
146 };
147 }
148 }
149
150 bool
nir_legacy_fsat_folds(nir_alu_instr * fsat)151 nir_legacy_fsat_folds(nir_alu_instr *fsat)
152 {
153 assert(fsat->op == nir_op_fsat);
154 nir_def *def = fsat->src[0].src.ssa;
155
156 /* No legacy user supports fp64 modifiers */
157 if (def->bit_size == 64)
158 return false;
159
160 /* Must be the only use */
161 if (!list_is_singular(&def->uses))
162 return false;
163
164 assert(&fsat->src[0].src ==
165 list_first_entry(&def->uses, nir_src, use_link));
166
167 nir_instr *generate = def->parent_instr;
168 if (generate->type != nir_instr_type_alu)
169 return false;
170
171 nir_alu_instr *generate_alu = nir_instr_as_alu(generate);
172 nir_alu_type dest_type = nir_op_infos[generate_alu->op].output_type;
173 if (dest_type != nir_type_float)
174 return false;
175
176 /* If we are a saturating a source modifier fsat(fabs(x)), we need to emit
177 * either the fsat or the modifier or else the sequence disappears.
178 */
179 if (generate_alu->op == nir_op_fabs || generate_alu->op == nir_op_fneg)
180 return false;
181
182 /* We can't do expansions without a move in the middle */
183 unsigned nr_components = generate_alu->def.num_components;
184 if (fsat->def.num_components != nr_components)
185 return false;
186
187 /* We don't handle swizzles here, so check for the identity */
188 for (unsigned i = 0; i < nr_components; ++i) {
189 if (fsat->src[0].swizzle[i] != i)
190 return false;
191 }
192
193 return true;
194 }
195
196 static inline bool
chase_fsat(nir_def ** def)197 chase_fsat(nir_def **def)
198 {
199 /* No legacy user supports fp64 modifiers */
200 if ((*def)->bit_size == 64)
201 return false;
202
203 if (!list_is_singular(&(*def)->uses))
204 return false;
205
206 nir_src *use = list_first_entry(&(*def)->uses, nir_src, use_link);
207 if (nir_src_is_if(use) || nir_src_parent_instr(use)->type != nir_instr_type_alu)
208 return false;
209
210 nir_alu_instr *fsat = nir_instr_as_alu(nir_src_parent_instr(use));
211 if (fsat->op != nir_op_fsat || !nir_legacy_fsat_folds(fsat))
212 return false;
213
214 /* Otherwise, we're good */
215 nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
216 *def = &alu->def;
217 return true;
218 }
219
220 nir_legacy_alu_dest
nir_legacy_chase_alu_dest(nir_def * def)221 nir_legacy_chase_alu_dest(nir_def *def)
222 {
223 /* Try SSA fsat. No users support 64-bit modifiers. */
224 if (chase_fsat(&def)) {
225 return (nir_legacy_alu_dest){
226 .dest.is_ssa = true,
227 .dest.ssa = def,
228 .fsat = true,
229 .write_mask = nir_component_mask(def->num_components),
230 };
231 } else {
232 return chase_alu_dest_helper(def);
233 }
234 }
235
236 nir_legacy_src
nir_legacy_chase_src(const nir_src * src)237 nir_legacy_chase_src(const nir_src *src)
238 {
239 nir_legacy_alu_src alu_src = chase_alu_src_helper(src);
240 assert(!alu_src.fabs && !alu_src.fneg);
241 return alu_src.src;
242 }
243
244 nir_legacy_dest
nir_legacy_chase_dest(nir_def * def)245 nir_legacy_chase_dest(nir_def *def)
246 {
247 nir_legacy_alu_dest alu_dest = chase_alu_dest_helper(def);
248 assert(!alu_dest.fsat);
249 assert(alu_dest.write_mask == nir_component_mask(def->num_components));
250
251 return alu_dest.dest;
252 }
253
254 static bool
fuse_mods_with_registers(nir_builder * b,nir_instr * instr,void * fuse_fabs_)255 fuse_mods_with_registers(nir_builder *b, nir_instr *instr, void *fuse_fabs_)
256 {
257 bool *fuse_fabs = fuse_fabs_;
258 if (instr->type != nir_instr_type_alu)
259 return false;
260
261 nir_alu_instr *alu = nir_instr_as_alu(instr);
262 if ((alu->op == nir_op_fneg || (*fuse_fabs && alu->op == nir_op_fabs)) &&
263 nir_legacy_float_mod_folds(alu)) {
264 /* Try to fold this instruction into the load, if possible. We only do
265 * this for loads in the same block as the use because uses of loads
266 * which cross block boundaries aren't trivial anyway.
267 */
268 nir_intrinsic_instr *load = nir_load_reg_for_def(alu->src[0].src.ssa);
269 if (load != NULL) {
270 /* Duplicate the load before changing it in case there are other
271 * users. We assume someone has run CSE so there should be at most
272 * four load instructions generated (one for each possible modifier
273 * combination), but likely only one or two.
274 */
275 b->cursor = nir_before_instr(&load->instr);
276 load = nir_instr_as_intrinsic(nir_instr_clone(b->shader, &load->instr));
277 nir_builder_instr_insert(b, &load->instr);
278
279 if (alu->op == nir_op_fabs) {
280 nir_intrinsic_set_legacy_fabs(load, true);
281 nir_intrinsic_set_legacy_fneg(load, false);
282 } else {
283 assert(alu->op == nir_op_fneg);
284 bool old_fneg = nir_intrinsic_legacy_fneg(load);
285 nir_intrinsic_set_legacy_fneg(load, !old_fneg);
286 }
287
288 /* Rewrite all the users to use the modified load instruction. We
289 * already know that they're all float ALU instructions because
290 * nir_legacy_float_mod_folds() returned true.
291 */
292 nir_foreach_use_including_if_safe(use, &alu->def) {
293 assert(!nir_src_is_if(use));
294 assert(nir_src_parent_instr(use)->type == nir_instr_type_alu);
295 nir_alu_src *alu_use = list_entry(use, nir_alu_src, src);
296 nir_src_rewrite(&alu_use->src, &load->def);
297 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
298 alu_use->swizzle[i] = alu->src[0].swizzle[alu_use->swizzle[i]];
299 }
300
301 nir_instr_remove(&alu->instr);
302
303 return true;
304 } else {
305 /* We don't want to attempt to add saturate to foldable mod ops */
306 return false;
307 }
308 }
309
310 nir_legacy_alu_dest dest = nir_legacy_chase_alu_dest(&alu->def);
311 if (dest.fsat) {
312 nir_intrinsic_instr *store = nir_store_reg_for_def(dest.dest.ssa);
313
314 if (store) {
315 nir_intrinsic_set_legacy_fsat(store, true);
316 nir_src_rewrite(&store->src[0], &alu->def);
317 return true;
318 }
319 }
320
321 return false;
322 }
323
324 void
nir_legacy_trivialize(nir_shader * s,bool fuse_fabs)325 nir_legacy_trivialize(nir_shader *s, bool fuse_fabs)
326 {
327 /* First, fuse modifiers with registers. This ensures that the helpers do not
328 * chase registers recursively, allowing registers to be trivialized easier.
329 */
330 if (nir_shader_instructions_pass(s, fuse_mods_with_registers,
331 nir_metadata_control_flow,
332 &fuse_fabs)) {
333 /* If we made progress, we likely left dead loads. Clean them up. */
334 NIR_PASS_V(s, nir_opt_dce);
335 }
336
337 /* Now that modifiers are dealt with, we can trivialize the regular way. */
338 NIR_PASS_V(s, nir_trivialize_registers);
339 }
340