1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 /*
28 * This pass tries to reduce the bitsize of phi instructions by either
29 * moving narrowing conversions from the phi's consumers to the phi's
30 * sources, if all the uses of the phi are equivalent narrowing
31 * instructions. In other words, convert:
32 *
33 * vec1 32 ssa_124 = load_const (0x00000000)
34 * ...
35 * loop {
36 * ...
37 * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
38 * vec1 16 ssa_8 = i2imp ssa_155
39 * ...
40 * vec1 32 ssa_53 = i2i32 ssa_52
41 * }
42 *
43 * into:
44 *
45 * vec1 32 ssa_124 = load_const (0x00000000)
46 * vec1 16 ssa_156 = i2imp ssa_124
47 * ...
48 * loop {
49 * ...
50 * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
51 * ...
52 * vec1 32 ssa_53 = i2i32 ssa_52
53 * vec1 16 ssa_157 = i2i16 ssa_53
54 * }
55 *
56 * Or failing that, tries to push widening conversion of phi srcs to
57 * the phi def. In this case, since load_const is frequently one
58 * of the phi sources this pass checks if can be narrowed without a
59 * loss of precision:
60 *
61 * vec1 32 ssa_0 = load_const (0x00000000)
62 * ...
63 * loop {
64 * ...
65 * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
66 * ...
67 * vec1 16 ssa_18 = iadd ssa_21, ssa_3
68 * vec1 32 ssa_19 = i2i32 ssa_18
69 * }
70 *
71 * into:
72 *
73 * vec1 32 ssa_0 = load_const (0x00000000)
74 * vec1 16 ssa_22 = i2i16 ssa_0
75 * ...
76 * loop {
77 * ...
78 * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
79 * vec1 32 ssa_23 = i2i32 ssa_8
80 * ...
81 * vec1 16 ssa_18 = iadd ssa_21, ssa_3
82 * }
83 *
84 * Note that either transformations can convert x2ymp into x2y16, which
85 * is normally done later in nir_opt_algebraic_late(), losing the option
86 * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
87 * cannot see through phis.
88 */
89
90 #define INVALID_OP nir_num_opcodes
91
92 /**
93 * Get the corresponding exact conversion for a x2ymp conversion
94 */
95 static nir_op
concrete_conversion(nir_op op)96 concrete_conversion(nir_op op)
97 {
98 switch (op) {
99 case nir_op_i2imp:
100 return nir_op_i2i16;
101 case nir_op_i2fmp:
102 return nir_op_i2f16;
103 case nir_op_u2fmp:
104 return nir_op_u2f16;
105 case nir_op_f2fmp:
106 return nir_op_f2f16;
107 case nir_op_f2imp:
108 return nir_op_f2i16;
109 case nir_op_f2ump:
110 return nir_op_f2u16;
111 default:
112 return op;
113 }
114 }
115
116 static nir_op
narrowing_conversion_op(nir_instr * instr,nir_op current_op)117 narrowing_conversion_op(nir_instr *instr, nir_op current_op)
118 {
119 if (instr->type != nir_instr_type_alu)
120 return INVALID_OP;
121
122 nir_op op = nir_instr_as_alu(instr)->op;
123 switch (op) {
124 case nir_op_i2imp:
125 case nir_op_i2i16:
126 case nir_op_i2fmp:
127 case nir_op_i2f16:
128 case nir_op_u2fmp:
129 case nir_op_u2f16:
130 case nir_op_f2fmp:
131 case nir_op_f2f16:
132 case nir_op_f2imp:
133 case nir_op_f2i16:
134 case nir_op_f2ump:
135 case nir_op_f2u16:
136 case nir_op_f2f16_rtne:
137 case nir_op_f2f16_rtz:
138 break;
139 default:
140 return INVALID_OP;
141 }
142
143 /* If we've already picked a conversion op from a previous phi use,
144 * make sure it is compatible with the current use
145 */
146 if (current_op != INVALID_OP) {
147 if (current_op != op) {
148 /* If we have different conversions, but one can be converted
149 * to the other, then let's do that:
150 */
151 if (concrete_conversion(current_op) == concrete_conversion(op)) {
152 op = concrete_conversion(op);
153 } else {
154 return INVALID_OP;
155 }
156 }
157 }
158
159 return op;
160 }
161
162 static nir_op
widening_conversion_op(nir_instr * instr,unsigned * bit_size)163 widening_conversion_op(nir_instr *instr, unsigned *bit_size)
164 {
165 if (instr->type != nir_instr_type_alu)
166 return INVALID_OP;
167
168 nir_alu_instr *alu = nir_instr_as_alu(instr);
169 switch (alu->op) {
170 case nir_op_i2i32:
171 case nir_op_i2f32:
172 case nir_op_u2f32:
173 case nir_op_f2f32:
174 case nir_op_f2i32:
175 case nir_op_f2u32:
176 break;
177 default:
178 return INVALID_OP;
179 }
180
181 *bit_size = nir_src_bit_size(alu->src[0].src);
182
183 /* We also need to check that the conversion's dest was actually
184 * wider:
185 */
186 if (alu->def.bit_size <= *bit_size)
187 return INVALID_OP;
188
189 return alu->op;
190 }
191
192 static nir_alu_type
op_to_type(nir_op op)193 op_to_type(nir_op op)
194 {
195 return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
196 }
197
198 /* Try to move narrowing instructions consuming the phi into the phi's
199 * sources to reduce the phi's precision:
200 */
201 static bool
try_move_narrowing_dst(nir_builder * b,nir_phi_instr * phi)202 try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
203 {
204 nir_op op = INVALID_OP;
205
206 /* If the phi has already been narrowed, nothing more to do: */
207 if (phi->def.bit_size != 32)
208 return false;
209
210 /* Are the only uses of the phi conversion instructions, and
211 * are they all the same conversion?
212 */
213 nir_foreach_use_including_if(use, &phi->def) {
214 /* an if use means the phi is used directly in a conditional, ie.
215 * without a conversion
216 */
217 if (nir_src_is_if(use))
218 return false;
219
220 op = narrowing_conversion_op(nir_src_parent_instr(use), op);
221
222 /* Not a (compatible) narrowing conversion: */
223 if (op == INVALID_OP)
224 return false;
225 }
226
227 /* If the phi has no uses, then nothing to do: */
228 if (op == INVALID_OP)
229 return false;
230
231 /* construct replacement phi instruction: */
232 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
233 nir_def_init(&new_phi->instr, &new_phi->def,
234 phi->def.num_components,
235 nir_alu_type_get_type_size(nir_op_infos[op].output_type));
236
237 /* Push the conversion into the new phi sources: */
238 nir_foreach_phi_src(src, phi) {
239 /* insert new conversion instr in block of original phi src: */
240 b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
241 nir_def *old_src = src->src.ssa;
242 nir_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
243
244 /* and add corresponding phi_src to the new_phi: */
245 nir_phi_instr_add_src(new_phi, src->pred, new_src);
246 }
247
248 /* And finally rewrite the original uses of the original phi uses to
249 * directly use the new phi, skipping the conversion out of the orig
250 * phi
251 */
252 nir_foreach_use(use, &phi->def) {
253 /* We've previously established that all the uses were alu
254 * conversion ops. Turn them into movs instead.
255 */
256 nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
257 alu->op = nir_op_mov;
258 }
259 nir_def_rewrite_uses(&phi->def, &new_phi->def);
260
261 /* And finally insert the new phi after all sources are in place: */
262 b->cursor = nir_after_instr(&phi->instr);
263 nir_builder_instr_insert(b, &new_phi->instr);
264
265 return true;
266 }
267
268 static bool
can_convert_load_const(nir_load_const_instr * lc,nir_op op)269 can_convert_load_const(nir_load_const_instr *lc, nir_op op)
270 {
271 nir_alu_type type = op_to_type(op);
272
273 /* Note that we only handle phi's with bit_size == 32: */
274 assert(lc->def.bit_size == 32);
275
276 for (unsigned i = 0; i < lc->def.num_components; i++) {
277 switch (type) {
278 case nir_type_int:
279 if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
280 return false;
281 break;
282 case nir_type_uint:
283 if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
284 return false;
285 break;
286 case nir_type_float:
287 if (lc->value[i].f32 != _mesa_half_to_float(
288 _mesa_float_to_half(lc->value[i].f32)))
289 return false;
290 break;
291 default:
292 unreachable("bad type");
293 return false;
294 }
295 }
296
297 return true;
298 }
299
300 /* Check all the phi sources to see if they are the same widening op, in
301 * which case we can push the widening op to the other side of the phi
302 */
303 static nir_op
find_widening_op(nir_phi_instr * phi,unsigned * bit_size)304 find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
305 {
306 nir_op op = INVALID_OP;
307
308 bool has_load_const = false;
309 *bit_size = 0;
310
311 nir_foreach_phi_src(src, phi) {
312 nir_instr *instr = src->src.ssa->parent_instr;
313 if (instr->type == nir_instr_type_load_const) {
314 has_load_const = true;
315 continue;
316 }
317
318 unsigned src_bit_size;
319 nir_op src_op = widening_conversion_op(instr, &src_bit_size);
320
321 /* Not a widening conversion: */
322 if (src_op == INVALID_OP)
323 return INVALID_OP;
324
325 /* If it is a widening conversion, it needs to be the same op as
326 * other phi sources:
327 */
328 if ((op != INVALID_OP) && (op != src_op))
329 return INVALID_OP;
330
331 if (*bit_size && (*bit_size != src_bit_size))
332 return INVALID_OP;
333
334 op = src_op;
335 *bit_size = src_bit_size;
336 }
337
338 if ((op == INVALID_OP) || !has_load_const)
339 return op;
340
341 /* If we could otherwise move widening sources, but load_const is
342 * one of the phi sources (and does not have a widening conversion,
343 * but could have a narrowing->widening sequence inserted without
344 * loss of precision), then we could insert a narrowing->widening
345 * sequence to make the rest of the transformation possible:
346 */
347 nir_foreach_phi_src(src, phi) {
348 nir_instr *instr = src->src.ssa->parent_instr;
349 if (instr->type != nir_instr_type_load_const)
350 continue;
351
352 if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
353 return INVALID_OP;
354 }
355
356 return op;
357 }
358
359 /* Try to move widening conversions into the phi to the phi's output
360 * to reduce the phi's precision:
361 */
362 static bool
try_move_widening_src(nir_builder * b,nir_phi_instr * phi)363 try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
364 {
365 /* If the phi has already been narrowed, nothing more to do: */
366 if (phi->def.bit_size != 32)
367 return false;
368
369 unsigned bit_size;
370 nir_op op = find_widening_op(phi, &bit_size);
371
372 if (op == INVALID_OP)
373 return false;
374
375 /* construct replacement phi instruction: */
376 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
377 nir_def_init(&new_phi->instr, &new_phi->def,
378 phi->def.num_components, bit_size);
379
380 /* Remove the widening conversions from the phi sources: */
381 nir_foreach_phi_src(src, phi) {
382 nir_instr *instr = src->src.ssa->parent_instr;
383 nir_def *new_src;
384
385 b->cursor = nir_after_instr(instr);
386
387 if (instr->type == nir_instr_type_load_const) {
388 /* if the src is a load_const, we've already verified that it
389 * is safe to insert a narrowing conversion to make the rest
390 * of this transformation legal:
391 */
392 nir_load_const_instr *lc = nir_instr_as_load_const(instr);
393
394 if (op_to_type(op) == nir_type_float) {
395 new_src = nir_f2f16(b, &lc->def);
396 } else {
397 new_src = nir_i2i16(b, &lc->def);
398 }
399 } else {
400 /* at this point we know the sources source is a conversion: */
401 nir_alu_instr *alu = nir_instr_as_alu(instr);
402
403 /* The conversion we are stripping off could have had a swizzle,
404 * so replace it with a mov if necessary:
405 */
406 unsigned num_comp = alu->def.num_components;
407 new_src = nir_mov_alu(b, alu->src[0], num_comp);
408 }
409
410 /* add corresponding phi_src to the new_phi: */
411 nir_phi_instr_add_src(new_phi, src->pred, new_src);
412 }
413
414 /* And insert the new phi after all sources are in place: */
415 b->cursor = nir_after_instr(&phi->instr);
416 nir_builder_instr_insert(b, &new_phi->instr);
417
418 /* And finally add back the widening conversion after the phi,
419 * and re-write the original phi's uses
420 */
421 b->cursor = nir_after_instr_and_phis(&new_phi->instr);
422 nir_def *def = nir_build_alu(b, op, &new_phi->def, NULL, NULL, NULL);
423
424 nir_def_rewrite_uses(&phi->def, def);
425
426 return true;
427 }
428
429 static bool
lower_phi(nir_builder * b,nir_phi_instr * phi)430 lower_phi(nir_builder *b, nir_phi_instr *phi)
431 {
432 bool progress = try_move_narrowing_dst(b, phi);
433 if (!progress)
434 progress = try_move_widening_src(b, phi);
435 return progress;
436 }
437
438 bool
nir_opt_phi_precision(nir_shader * shader)439 nir_opt_phi_precision(nir_shader *shader)
440 {
441 bool progress = false;
442
443 /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
444 unsigned bit_sizes_used = shader->info.bit_sizes_float |
445 shader->info.bit_sizes_int;
446
447 /* Note: if the info is zeroed, we conservatively run to avoid gathering
448 * info, which doesn't work for libraries.
449 */
450 if (bit_sizes_used && !(bit_sizes_used & (8 | 16)))
451 return false;
452
453 nir_foreach_function_impl(impl, shader) {
454 nir_builder b = nir_builder_create(impl);
455
456 nir_foreach_block(block, impl) {
457 nir_foreach_phi_safe(phi, block)
458 progress |= lower_phi(&b, phi);
459 }
460
461 if (progress) {
462 nir_metadata_preserve(impl,
463 nir_metadata_control_flow);
464 } else {
465 nir_metadata_preserve(impl, nir_metadata_all);
466 }
467 }
468
469 return progress;
470 }
471