xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_phi_precision.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 /*
28  * This pass tries to reduce the bitsize of phi instructions by either
29  * moving narrowing conversions from the phi's consumers to the phi's
30  * sources, if all the uses of the phi are equivalent narrowing
31  * instructions.  In other words, convert:
32  *
33  *    vec1 32 ssa_124 = load_const (0x00000000)
34  *    ...
35  *    loop {
36  *        ...
37  *        vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
38  *        vec1 16 ssa_8 = i2imp ssa_155
39  *        ...
40  *        vec1 32 ssa_53 = i2i32 ssa_52
41  *    }
42  *
43  * into:
44  *
45  *    vec1 32 ssa_124 = load_const (0x00000000)
46  *    vec1 16 ssa_156 = i2imp ssa_124
47  *    ...
48  *    loop {
49  *        ...
50  *        vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
51  *        ...
52  *        vec1 32 ssa_53 = i2i32 ssa_52
53  *        vec1 16 ssa_157 = i2i16 ssa_53
54  *    }
55  *
56  * Or failing that, tries to push widening conversion of phi srcs to
57  * the phi def.  In this case, since load_const is frequently one
58  * of the phi sources this pass checks if can be narrowed without a
59  * loss of precision:
60  *
61  *    vec1 32 ssa_0 = load_const (0x00000000)
62  *    ...
63  *    loop {
64  *        ...
65  *        vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
66  *        ...
67  *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
68  *        vec1 32 ssa_19 = i2i32 ssa_18
69  *    }
70  *
71  * into:
72  *
73  *    vec1 32 ssa_0 = load_const (0x00000000)
74  *    vec1 16 ssa_22 = i2i16 ssa_0
75  *    ...
76  *    loop {
77  *        ...
78  *        vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
79  *        vec1 32 ssa_23 = i2i32 ssa_8
80  *        ...
81  *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
82  *    }
83  *
84  * Note that either transformations can convert x2ymp  into x2y16, which
85  * is normally done later in nir_opt_algebraic_late(), losing the option
86  * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
87  * cannot see through phis.
88  */
89 
90 #define INVALID_OP nir_num_opcodes
91 
92 /**
93  * Get the corresponding exact conversion for a x2ymp conversion
94  */
95 static nir_op
concrete_conversion(nir_op op)96 concrete_conversion(nir_op op)
97 {
98    switch (op) {
99    case nir_op_i2imp:
100       return nir_op_i2i16;
101    case nir_op_i2fmp:
102       return nir_op_i2f16;
103    case nir_op_u2fmp:
104       return nir_op_u2f16;
105    case nir_op_f2fmp:
106       return nir_op_f2f16;
107    case nir_op_f2imp:
108       return nir_op_f2i16;
109    case nir_op_f2ump:
110       return nir_op_f2u16;
111    default:
112       return op;
113    }
114 }
115 
116 static nir_op
narrowing_conversion_op(nir_instr * instr,nir_op current_op)117 narrowing_conversion_op(nir_instr *instr, nir_op current_op)
118 {
119    if (instr->type != nir_instr_type_alu)
120       return INVALID_OP;
121 
122    nir_op op = nir_instr_as_alu(instr)->op;
123    switch (op) {
124    case nir_op_i2imp:
125    case nir_op_i2i16:
126    case nir_op_i2fmp:
127    case nir_op_i2f16:
128    case nir_op_u2fmp:
129    case nir_op_u2f16:
130    case nir_op_f2fmp:
131    case nir_op_f2f16:
132    case nir_op_f2imp:
133    case nir_op_f2i16:
134    case nir_op_f2ump:
135    case nir_op_f2u16:
136    case nir_op_f2f16_rtne:
137    case nir_op_f2f16_rtz:
138       break;
139    default:
140       return INVALID_OP;
141    }
142 
143    /* If we've already picked a conversion op from a previous phi use,
144     * make sure it is compatible with the current use
145     */
146    if (current_op != INVALID_OP) {
147       if (current_op != op) {
148          /* If we have different conversions, but one can be converted
149           * to the other, then let's do that:
150           */
151          if (concrete_conversion(current_op) == concrete_conversion(op)) {
152             op = concrete_conversion(op);
153          } else {
154             return INVALID_OP;
155          }
156       }
157    }
158 
159    return op;
160 }
161 
162 static nir_op
widening_conversion_op(nir_instr * instr,unsigned * bit_size)163 widening_conversion_op(nir_instr *instr, unsigned *bit_size)
164 {
165    if (instr->type != nir_instr_type_alu)
166       return INVALID_OP;
167 
168    nir_alu_instr *alu = nir_instr_as_alu(instr);
169    switch (alu->op) {
170    case nir_op_i2i32:
171    case nir_op_i2f32:
172    case nir_op_u2f32:
173    case nir_op_f2f32:
174    case nir_op_f2i32:
175    case nir_op_f2u32:
176       break;
177    default:
178       return INVALID_OP;
179    }
180 
181    *bit_size = nir_src_bit_size(alu->src[0].src);
182 
183    /* We also need to check that the conversion's dest was actually
184     * wider:
185     */
186    if (alu->def.bit_size <= *bit_size)
187       return INVALID_OP;
188 
189    return alu->op;
190 }
191 
192 static nir_alu_type
op_to_type(nir_op op)193 op_to_type(nir_op op)
194 {
195    return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
196 }
197 
198 /* Try to move narrowing instructions consuming the phi into the phi's
199  * sources to reduce the phi's precision:
200  */
201 static bool
try_move_narrowing_dst(nir_builder * b,nir_phi_instr * phi)202 try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
203 {
204    nir_op op = INVALID_OP;
205 
206    /* If the phi has already been narrowed, nothing more to do: */
207    if (phi->def.bit_size != 32)
208       return false;
209 
210    /* Are the only uses of the phi conversion instructions, and
211     * are they all the same conversion?
212     */
213    nir_foreach_use_including_if(use, &phi->def) {
214       /* an if use means the phi is used directly in a conditional, ie.
215        * without a conversion
216        */
217       if (nir_src_is_if(use))
218          return false;
219 
220       op = narrowing_conversion_op(nir_src_parent_instr(use), op);
221 
222       /* Not a (compatible) narrowing conversion: */
223       if (op == INVALID_OP)
224          return false;
225    }
226 
227    /* If the phi has no uses, then nothing to do: */
228    if (op == INVALID_OP)
229       return false;
230 
231    /* construct replacement phi instruction: */
232    nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
233    nir_def_init(&new_phi->instr, &new_phi->def,
234                 phi->def.num_components,
235                 nir_alu_type_get_type_size(nir_op_infos[op].output_type));
236 
237    /* Push the conversion into the new phi sources: */
238    nir_foreach_phi_src(src, phi) {
239       /* insert new conversion instr in block of original phi src: */
240       b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
241       nir_def *old_src = src->src.ssa;
242       nir_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
243 
244       /* and add corresponding phi_src to the new_phi: */
245       nir_phi_instr_add_src(new_phi, src->pred, new_src);
246    }
247 
248    /* And finally rewrite the original uses of the original phi uses to
249     * directly use the new phi, skipping the conversion out of the orig
250     * phi
251     */
252    nir_foreach_use(use, &phi->def) {
253       /* We've previously established that all the uses were alu
254        * conversion ops.  Turn them into movs instead.
255        */
256       nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
257       alu->op = nir_op_mov;
258    }
259    nir_def_rewrite_uses(&phi->def, &new_phi->def);
260 
261    /* And finally insert the new phi after all sources are in place: */
262    b->cursor = nir_after_instr(&phi->instr);
263    nir_builder_instr_insert(b, &new_phi->instr);
264 
265    return true;
266 }
267 
268 static bool
can_convert_load_const(nir_load_const_instr * lc,nir_op op)269 can_convert_load_const(nir_load_const_instr *lc, nir_op op)
270 {
271    nir_alu_type type = op_to_type(op);
272 
273    /* Note that we only handle phi's with bit_size == 32: */
274    assert(lc->def.bit_size == 32);
275 
276    for (unsigned i = 0; i < lc->def.num_components; i++) {
277       switch (type) {
278       case nir_type_int:
279          if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
280             return false;
281          break;
282       case nir_type_uint:
283          if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
284             return false;
285          break;
286       case nir_type_float:
287          if (lc->value[i].f32 != _mesa_half_to_float(
288                                     _mesa_float_to_half(lc->value[i].f32)))
289             return false;
290          break;
291       default:
292          unreachable("bad type");
293          return false;
294       }
295    }
296 
297    return true;
298 }
299 
300 /* Check all the phi sources to see if they are the same widening op, in
301  * which case we can push the widening op to the other side of the phi
302  */
303 static nir_op
find_widening_op(nir_phi_instr * phi,unsigned * bit_size)304 find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
305 {
306    nir_op op = INVALID_OP;
307 
308    bool has_load_const = false;
309    *bit_size = 0;
310 
311    nir_foreach_phi_src(src, phi) {
312       nir_instr *instr = src->src.ssa->parent_instr;
313       if (instr->type == nir_instr_type_load_const) {
314          has_load_const = true;
315          continue;
316       }
317 
318       unsigned src_bit_size;
319       nir_op src_op = widening_conversion_op(instr, &src_bit_size);
320 
321       /* Not a widening conversion: */
322       if (src_op == INVALID_OP)
323          return INVALID_OP;
324 
325       /* If it is a widening conversion, it needs to be the same op as
326        * other phi sources:
327        */
328       if ((op != INVALID_OP) && (op != src_op))
329          return INVALID_OP;
330 
331       if (*bit_size && (*bit_size != src_bit_size))
332          return INVALID_OP;
333 
334       op = src_op;
335       *bit_size = src_bit_size;
336    }
337 
338    if ((op == INVALID_OP) || !has_load_const)
339       return op;
340 
341    /* If we could otherwise move widening sources, but load_const is
342     * one of the phi sources (and does not have a widening conversion,
343     * but could have a narrowing->widening sequence inserted without
344     * loss of precision), then we could insert a narrowing->widening
345     * sequence to make the rest of the transformation possible:
346     */
347    nir_foreach_phi_src(src, phi) {
348       nir_instr *instr = src->src.ssa->parent_instr;
349       if (instr->type != nir_instr_type_load_const)
350          continue;
351 
352       if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
353          return INVALID_OP;
354    }
355 
356    return op;
357 }
358 
359 /* Try to move widening conversions into the phi to the phi's output
360  * to reduce the phi's precision:
361  */
362 static bool
try_move_widening_src(nir_builder * b,nir_phi_instr * phi)363 try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
364 {
365    /* If the phi has already been narrowed, nothing more to do: */
366    if (phi->def.bit_size != 32)
367       return false;
368 
369    unsigned bit_size;
370    nir_op op = find_widening_op(phi, &bit_size);
371 
372    if (op == INVALID_OP)
373       return false;
374 
375    /* construct replacement phi instruction: */
376    nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
377    nir_def_init(&new_phi->instr, &new_phi->def,
378                 phi->def.num_components, bit_size);
379 
380    /* Remove the widening conversions from the phi sources: */
381    nir_foreach_phi_src(src, phi) {
382       nir_instr *instr = src->src.ssa->parent_instr;
383       nir_def *new_src;
384 
385       b->cursor = nir_after_instr(instr);
386 
387       if (instr->type == nir_instr_type_load_const) {
388          /* if the src is a load_const, we've already verified that it
389           * is safe to insert a narrowing conversion to make the rest
390           * of this transformation legal:
391           */
392          nir_load_const_instr *lc = nir_instr_as_load_const(instr);
393 
394          if (op_to_type(op) == nir_type_float) {
395             new_src = nir_f2f16(b, &lc->def);
396          } else {
397             new_src = nir_i2i16(b, &lc->def);
398          }
399       } else {
400          /* at this point we know the sources source is a conversion: */
401          nir_alu_instr *alu = nir_instr_as_alu(instr);
402 
403          /* The conversion we are stripping off could have had a swizzle,
404           * so replace it with a mov if necessary:
405           */
406          unsigned num_comp = alu->def.num_components;
407          new_src = nir_mov_alu(b, alu->src[0], num_comp);
408       }
409 
410       /* add corresponding phi_src to the new_phi: */
411       nir_phi_instr_add_src(new_phi, src->pred, new_src);
412    }
413 
414    /* And insert the new phi after all sources are in place: */
415    b->cursor = nir_after_instr(&phi->instr);
416    nir_builder_instr_insert(b, &new_phi->instr);
417 
418    /* And finally add back the widening conversion after the phi,
419     * and re-write the original phi's uses
420     */
421    b->cursor = nir_after_instr_and_phis(&new_phi->instr);
422    nir_def *def = nir_build_alu(b, op, &new_phi->def, NULL, NULL, NULL);
423 
424    nir_def_rewrite_uses(&phi->def, def);
425 
426    return true;
427 }
428 
429 static bool
lower_phi(nir_builder * b,nir_phi_instr * phi)430 lower_phi(nir_builder *b, nir_phi_instr *phi)
431 {
432    bool progress = try_move_narrowing_dst(b, phi);
433    if (!progress)
434       progress = try_move_widening_src(b, phi);
435    return progress;
436 }
437 
438 bool
nir_opt_phi_precision(nir_shader * shader)439 nir_opt_phi_precision(nir_shader *shader)
440 {
441    bool progress = false;
442 
443    /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
444    unsigned bit_sizes_used = shader->info.bit_sizes_float |
445                              shader->info.bit_sizes_int;
446 
447    /* Note: if the info is zeroed, we conservatively run to avoid gathering
448     * info, which doesn't work for libraries.
449     */
450    if (bit_sizes_used && !(bit_sizes_used & (8 | 16)))
451       return false;
452 
453    nir_foreach_function_impl(impl, shader) {
454       nir_builder b = nir_builder_create(impl);
455 
456       nir_foreach_block(block, impl) {
457          nir_foreach_phi_safe(phi, block)
458             progress |= lower_phi(&b, phi);
459       }
460 
461       if (progress) {
462          nir_metadata_preserve(impl,
463                                nir_metadata_control_flow);
464       } else {
465          nir_metadata_preserve(impl, nir_metadata_all);
466       }
467    }
468 
469    return progress;
470 }
471