1 /*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "util/mesa-blake3.h"
27 #include <math.h>
28
29 /** @file nir_opt_undef.c
30 *
31 * Handles optimization of operations involving ssa_undef.
32 */
33
34 struct undef_options {
35 bool disallow_undef_to_nan;
36 };
37
38 /**
39 * Turn conditional selects between an undef and some other value into a move
40 * of that other value (on the assumption that the condition's going to be
41 * choosing the defined value). This reduces work after if flattening when
42 * each side of the if is defining a variable.
43 */
44 static bool
opt_undef_csel(nir_builder * b,nir_alu_instr * instr)45 opt_undef_csel(nir_builder *b, nir_alu_instr *instr)
46 {
47 if (!nir_op_is_selection(instr->op))
48 return false;
49
50 for (int i = 1; i <= 2; i++) {
51 nir_instr *parent = instr->src[i].src.ssa->parent_instr;
52 if (parent->type != nir_instr_type_undef)
53 continue;
54
55 b->cursor = nir_instr_remove(&instr->instr);
56 nir_def *mov = nir_mov_alu(b, instr->src[i == 1 ? 2 : 1],
57 instr->def.num_components);
58 nir_def_rewrite_uses(&instr->def, mov);
59
60 return true;
61 }
62
63 return false;
64 }
65
66 /**
67 * Replace vecN(undef, undef, ...) with a single undef.
68 */
69 static bool
opt_undef_vecN(nir_builder * b,nir_alu_instr * alu)70 opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
71 {
72 if (!nir_op_is_vec_or_mov(alu->op))
73 return false;
74
75 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
76 if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_undef)
77 return false;
78 }
79
80 b->cursor = nir_before_instr(&alu->instr);
81 nir_def *undef = nir_undef(b, alu->def.num_components,
82 alu->def.bit_size);
83 nir_def_rewrite_uses(&alu->def, undef);
84
85 return true;
86 }
87
88 static uint32_t
nir_get_undef_mask(nir_def * def)89 nir_get_undef_mask(nir_def *def)
90 {
91 nir_instr *instr = def->parent_instr;
92
93 if (instr->type == nir_instr_type_undef)
94 return BITSET_MASK(def->num_components);
95
96 if (instr->type != nir_instr_type_alu)
97 return 0;
98
99 nir_alu_instr *alu = nir_instr_as_alu(instr);
100 unsigned undef = 0;
101
102 /* nir_op_mov of undef is handled by opt_undef_vecN() */
103 if (nir_op_is_vec(alu->op)) {
104 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
105 if (alu->src[i].src.ssa->parent_instr->type ==
106 nir_instr_type_undef) {
107 undef |= BITSET_MASK(nir_ssa_alu_instr_src_components(alu, i)) << i;
108 }
109 }
110 }
111
112 return undef;
113 }
114
115 /**
116 * Remove any store intrinsic writemask channels whose value is undefined (the
117 * existing value is a fine representation of "undefined").
118 */
119 static bool
opt_undef_store(nir_intrinsic_instr * intrin)120 opt_undef_store(nir_intrinsic_instr *intrin)
121 {
122 int arg_index;
123 switch (intrin->intrinsic) {
124 case nir_intrinsic_store_deref:
125 arg_index = 1;
126 break;
127 case nir_intrinsic_store_output:
128 case nir_intrinsic_store_per_vertex_output:
129 case nir_intrinsic_store_per_primitive_output:
130 case nir_intrinsic_store_ssbo:
131 case nir_intrinsic_store_shared:
132 case nir_intrinsic_store_global:
133 case nir_intrinsic_store_scratch:
134 arg_index = 0;
135 break;
136 default:
137 return false;
138 }
139
140 nir_def *def = intrin->src[arg_index].ssa;
141
142 unsigned write_mask = nir_intrinsic_write_mask(intrin);
143 unsigned undef_mask = nir_get_undef_mask(def);
144
145 if (!(write_mask & undef_mask))
146 return false;
147
148 write_mask &= ~undef_mask;
149 if (!write_mask)
150 nir_instr_remove(&intrin->instr);
151 else
152 nir_intrinsic_set_write_mask(intrin, write_mask);
153
154 return true;
155 }
156
157 struct visit_info {
158 bool replace_undef_with_constant;
159 bool prefer_nan;
160 bool must_keep_undef;
161 };
162
163 /**
164 * Analyze an undef use to see if replacing undef with a constant is
165 * beneficial.
166 */
167 static void
visit_undef_use(nir_src * src,struct visit_info * info)168 visit_undef_use(nir_src *src, struct visit_info *info)
169 {
170 if (nir_src_is_if(src)) {
171 /* If the use is "if", keep undef because the branch will be eliminated
172 * by nir_opt_dead_cf.
173 */
174 info->must_keep_undef = true;
175 return;
176 }
177
178 nir_instr *instr = nir_src_parent_instr(src);
179
180 if (instr->type == nir_instr_type_alu) {
181 /* Replacing undef with a constant is only beneficial with ALU
182 * instructions because it can eliminate them or simplify them.
183 */
184 nir_alu_instr *alu = nir_instr_as_alu(instr);
185
186 /* Follow movs and vecs.
187 *
188 * Note that all vector component uses are followed and swizzles are
189 * ignored.
190 */
191 if (alu->op == nir_op_mov || nir_op_is_vec(alu->op)) {
192 nir_foreach_use_including_if(next_src, &alu->def) {
193 visit_undef_use(next_src, info);
194 }
195 return;
196 }
197
198 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
199
200 for (unsigned i = 0; i < num_srcs; i++) {
201 if (&alu->src[i].src != src)
202 continue;
203
204 if (nir_op_is_selection(alu->op) && i != 0) {
205 /* nir_opt_algebraic can eliminate a select opcode only if src0 is
206 * a constant. If the undef use is src1 or src2, it will be
207 * handled by opt_undef_csel.
208 */
209 continue;
210 }
211
212 info->replace_undef_with_constant = true;
213 if (nir_op_infos[alu->op].input_types[i] & nir_type_float &&
214 alu->op != nir_op_fmulz &&
215 (alu->op != nir_op_ffmaz || i == 2))
216 info->prefer_nan = true;
217 }
218 } else {
219 /* If the use is not ALU, don't replace undef. We need to preserve
220 * undef for stores and phis because those are handled differently,
221 * and replacing undef with a constant would result in worse code.
222 */
223 info->must_keep_undef = true;
224 return;
225 }
226 }
227
228 /**
229 * Replace ssa_undef used by ALU opcodes with 0 or NaN, whichever eliminates
230 * more code.
231 *
232 * Replace it with NaN if an FP opcode uses undef, which will cause the opcode
233 * to be eliminated by nir_opt_algebraic. 0 would not eliminate the FP opcode.
234 */
235 static bool
replace_ssa_undef(nir_builder * b,nir_instr * instr,const struct undef_options * options)236 replace_ssa_undef(nir_builder *b, nir_instr *instr,
237 const struct undef_options *options)
238 {
239 nir_undef_instr *undef = nir_instr_as_undef(instr);
240 struct visit_info info = {0};
241
242 nir_foreach_use_including_if(src, &undef->def) {
243 visit_undef_use(src, &info);
244 }
245
246 if (info.must_keep_undef || !info.replace_undef_with_constant)
247 return false;
248
249 b->cursor = nir_before_instr(&undef->instr);
250 nir_def *replacement;
251
252 /* If undef is used as float, replace it with NaN, which will
253 * eliminate all FP instructions that consume it. Else, replace it
254 * with 0, which is more likely to eliminate non-FP instructions.
255 */
256 if (info.prefer_nan && !options->disallow_undef_to_nan)
257 replacement = nir_imm_floatN_t(b, NAN, undef->def.bit_size);
258 else
259 replacement = nir_imm_intN_t(b, 0, undef->def.bit_size);
260
261 if (undef->def.num_components > 1)
262 replacement = nir_replicate(b, replacement, undef->def.num_components);
263
264 nir_def_rewrite_uses_after(&undef->def, replacement, &undef->instr);
265 nir_instr_remove(&undef->instr);
266 return true;
267 }
268
269 static bool
nir_opt_undef_instr(nir_builder * b,nir_instr * instr,void * data)270 nir_opt_undef_instr(nir_builder *b, nir_instr *instr, void *data)
271 {
272 const struct undef_options *options = data;
273
274 if (instr->type == nir_instr_type_undef) {
275 return replace_ssa_undef(b, instr, options);
276 } else if (instr->type == nir_instr_type_alu) {
277 nir_alu_instr *alu = nir_instr_as_alu(instr);
278 return opt_undef_csel(b, alu) ||
279 opt_undef_vecN(b, alu);
280 } else if (instr->type == nir_instr_type_intrinsic) {
281 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
282 return opt_undef_store(intrin);
283 }
284
285 return false;
286 }
287
288 bool
nir_opt_undef(nir_shader * shader)289 nir_opt_undef(nir_shader *shader)
290 {
291 struct undef_options options = {0};
292
293 /* Disallow the undef->NaN transformation only for those shaders where
294 * it's known to break rendering. These are shader source BLAKE3s printed by
295 * nir_print_shader().
296 */
297 uint32_t shader_blake3s[][BLAKE3_OUT_LEN32] = {
298 /* gputest/gimark */
299 {0x582c214b, 0x25478275, 0xc9a835d2, 0x95c9b643, 0x69deae47, 0x213c7427, 0xa9da66a5, 0xac254ed2},
300
301 /* Viewperf13/CATIA_car_01 */
302 {0x880dfa0f, 0x60e32201, 0xe3a89f59, 0xb1cc6f07, 0xcdbebe66, 0x20122aec, 0x83450d4e, 0x8f42843d}, /* Taillights */
303 {0x624e53bb, 0x8eb635ba, 0xb1e4ed9b, 0x651b0fec, 0x86fcf79a, 0xde0863fb, 0x09ce80c1, 0xd972e40f}, /* Grill */
304 {0x01a8db39, 0xfa175175, 0x621f7302, 0xfcde9177, 0x72d873bf, 0x048d38c1, 0xe669d2de, 0xaa6584af}, /* Headlights */
305 {0x32029770, 0xab295b41, 0x3f1daf07, 0x9dd9153e, 0xd598be73, 0xe555b2f3, 0x6e087eaf, 0x084d329c}, /* Rims */
306
307 /* Viewperf13/CATIA_car_04 */
308 {0x55207b90, 0x08fa2f8f, 0x9db62464, 0xadba6570, 0xb6d5d962, 0xf434bff5, 0x46a34d64, 0x021bfb45}, /* Headlights */
309 {0x83fbdd6a, 0x231b027e, 0x6f142248, 0x2b3045de, 0xd2a4f460, 0x59dfb8d8, 0x6dbc00f9, 0xcca13143}, /* Rims */
310 {0x88ed3a0a, 0xf128d384, 0x8161fdac, 0xd10cb257, 0x5e63db2d, 0x56798b6f, 0x881e81ee, 0xa4e937d4}, /* Tires */
311 {0xbf84697c, 0x3bc75bb6, 0x9d012175, 0x2dd90bcf, 0x0562f0ed, 0x5aa80e62, 0xb5793ae3, 0x9127bcab}, /* Windows */
312 {0x47a3eb4b, 0x136f676d, 0x94045ed3, 0x57b00972, 0x8cda7550, 0x88327fda, 0x37f7cf37, 0x66db05e3}, /* Body */
313 };
314
315 for (unsigned i = 0; i < ARRAY_SIZE(shader_blake3s); i++) {
316 if (_mesa_printed_blake3_equal(shader->info.source_blake3, shader_blake3s[i])) {
317 options.disallow_undef_to_nan = true;
318 break;
319 }
320 }
321
322 if (shader->info.use_legacy_math_rules)
323 options.disallow_undef_to_nan = true;
324
325 return nir_shader_instructions_pass(shader,
326 nir_opt_undef_instr,
327 nir_metadata_control_flow,
328 &options);
329 }
330