xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright Pavel Ondračka <[email protected]>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <stdbool.h>
7 #include "r300_nir.h"
8 #include "nir_builder.h"
9 
10 static int
follow_modifiers(nir_instr * instr)11 follow_modifiers(nir_instr *instr)
12 {
13    /* We don't have texturing so the only other options besides alus are
14     * just load input, load ubo or phi. We can copy propagate the first two
15     * in most cases. The cases when the copy propagate is not guaranteed
16     * to work is with indirect ubo load and in the presence of control flow.
17     * So just be safe and count this as a separate tmp.
18     */
19    if (instr->type == nir_instr_type_intrinsic) {
20       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
21       /* It should be enough to check if any of the uses is in phi. */
22       if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
23           intrin->intrinsic == nir_intrinsic_load_constant ||
24           intrin->intrinsic == nir_intrinsic_load_input) {
25           nir_foreach_use(use, &intrin->def) {
26               if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
27                  return intrin->def.index;
28           }
29       }
30       if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 &&
31           !nir_src_is_const(intrin->src[1]))
32       return intrin->def.index;
33    }
34    /* Assume the worst when we see a phi. */
35    if (instr->type == nir_instr_type_phi)
36       return nir_instr_as_phi(instr)->def.index;
37 
38    if (instr->type != nir_instr_type_alu)
39       return -1;
40 
41    nir_alu_instr *alu = nir_instr_as_alu(instr);
42 
43    if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) {
44       return follow_modifiers(alu->src[0].src.ssa->parent_instr);
45    }
46    return alu->def.index;
47 }
48 
49 static bool
has_three_different_tmp_sources(nir_alu_instr * fcsel)50 has_three_different_tmp_sources(nir_alu_instr *fcsel)
51 {
52    unsigned src_def_index[3];
53    for (unsigned i = 0; i < 3; i++) {
54       int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr);
55       if (index == -1)
56          return false;
57       else
58 	 src_def_index[i] = index;
59    }
60    return src_def_index[0] != src_def_index[1] &&
61           src_def_index[0] != src_def_index[2] &&
62           src_def_index[1] != src_def_index[2];
63 }
64 
65 static bool
is_comparison(nir_instr * instr)66 is_comparison(nir_instr *instr)
67 {
68    if (instr->type != nir_instr_type_alu)
69       return false;
70 
71    nir_alu_instr *alu = nir_instr_as_alu(instr);
72 
73    switch (alu->op) {
74    case nir_op_sge:
75    case nir_op_slt:
76    case nir_op_seq:
77    case nir_op_sne:
78       return true;
79    default:
80       return false;
81    }
82 }
83 
84 static bool
r300_nir_lower_fcsel_instr(nir_builder * b,nir_alu_instr * alu,void * data)85 r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data)
86 {
87    if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != nir_op_fcsel_gt)
88       return false;
89 
90    if (has_three_different_tmp_sources(alu)) {
91       nir_def *lrp;
92       b->cursor = nir_before_instr(&alu->instr);
93       /* Lower to LRP.
94        * At this point there are no fcsels as all bcsels were converted to
95        * fcsel_gt by nir_lower_bool_to_float, however we can save on the slt
96        * even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
97        */
98       nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
99       if (alu->op == nir_op_fcsel ||
100           (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
101          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
102                         nir_ssa_for_alu_src(b, alu, 1),
103                         nir_ssa_for_alu_src(b, alu, 0));
104       } else if (alu->op == nir_op_fcsel_ge) {
105          nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0));
106          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
107                         nir_ssa_for_alu_src(b, alu, 1), sge);
108       } else {
109          nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)),
110                                 nir_imm_float(b, 0.0));
111          lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
112                         nir_ssa_for_alu_src(b, alu, 1), slt);
113       }
114 
115       nir_def_replace(&alu->def, lrp);
116       return true;
117    }
118    return false;
119 }
120 
121 bool
r300_nir_lower_fcsel_r500(nir_shader * shader)122 r300_nir_lower_fcsel_r500(nir_shader *shader)
123 {
124    return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr,
125                               nir_metadata_control_flow, NULL);
126 }
127