1 /*
2 * Copyright Pavel Ondračka <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <stdbool.h>
7 #include "r300_nir.h"
8 #include "nir_builder.h"
9
10 static int
follow_modifiers(nir_instr * instr)11 follow_modifiers(nir_instr *instr)
12 {
13 /* We don't have texturing so the only other options besides alus are
14 * just load input, load ubo or phi. We can copy propagate the first two
15 * in most cases. The cases when the copy propagate is not guaranteed
16 * to work is with indirect ubo load and in the presence of control flow.
17 * So just be safe and count this as a separate tmp.
18 */
19 if (instr->type == nir_instr_type_intrinsic) {
20 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
21 /* It should be enough to check if any of the uses is in phi. */
22 if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
23 intrin->intrinsic == nir_intrinsic_load_constant ||
24 intrin->intrinsic == nir_intrinsic_load_input) {
25 nir_foreach_use(use, &intrin->def) {
26 if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
27 return intrin->def.index;
28 }
29 }
30 if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 &&
31 !nir_src_is_const(intrin->src[1]))
32 return intrin->def.index;
33 }
34 /* Assume the worst when we see a phi. */
35 if (instr->type == nir_instr_type_phi)
36 return nir_instr_as_phi(instr)->def.index;
37
38 if (instr->type != nir_instr_type_alu)
39 return -1;
40
41 nir_alu_instr *alu = nir_instr_as_alu(instr);
42
43 if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) {
44 return follow_modifiers(alu->src[0].src.ssa->parent_instr);
45 }
46 return alu->def.index;
47 }
48
49 static bool
has_three_different_tmp_sources(nir_alu_instr * fcsel)50 has_three_different_tmp_sources(nir_alu_instr *fcsel)
51 {
52 unsigned src_def_index[3];
53 for (unsigned i = 0; i < 3; i++) {
54 int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr);
55 if (index == -1)
56 return false;
57 else
58 src_def_index[i] = index;
59 }
60 return src_def_index[0] != src_def_index[1] &&
61 src_def_index[0] != src_def_index[2] &&
62 src_def_index[1] != src_def_index[2];
63 }
64
65 static bool
is_comparison(nir_instr * instr)66 is_comparison(nir_instr *instr)
67 {
68 if (instr->type != nir_instr_type_alu)
69 return false;
70
71 nir_alu_instr *alu = nir_instr_as_alu(instr);
72
73 switch (alu->op) {
74 case nir_op_sge:
75 case nir_op_slt:
76 case nir_op_seq:
77 case nir_op_sne:
78 return true;
79 default:
80 return false;
81 }
82 }
83
84 static bool
r300_nir_lower_fcsel_instr(nir_builder * b,nir_alu_instr * alu,void * data)85 r300_nir_lower_fcsel_instr(nir_builder *b, nir_alu_instr *alu, void *data)
86 {
87 if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != nir_op_fcsel_gt)
88 return false;
89
90 if (has_three_different_tmp_sources(alu)) {
91 nir_def *lrp;
92 b->cursor = nir_before_instr(&alu->instr);
93 /* Lower to LRP.
94 * At this point there are no fcsels as all bcsels were converted to
95 * fcsel_gt by nir_lower_bool_to_float, however we can save on the slt
96 * even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
97 */
98 nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
99 if (alu->op == nir_op_fcsel ||
100 (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
101 lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
102 nir_ssa_for_alu_src(b, alu, 1),
103 nir_ssa_for_alu_src(b, alu, 0));
104 } else if (alu->op == nir_op_fcsel_ge) {
105 nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0));
106 lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
107 nir_ssa_for_alu_src(b, alu, 1), sge);
108 } else {
109 nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)),
110 nir_imm_float(b, 0.0));
111 lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
112 nir_ssa_for_alu_src(b, alu, 1), slt);
113 }
114
115 nir_def_replace(&alu->def, lrp);
116 return true;
117 }
118 return false;
119 }
120
121 bool
r300_nir_lower_fcsel_r500(nir_shader * shader)122 r300_nir_lower_fcsel_r500(nir_shader *shader)
123 {
124 return nir_shader_alu_pass(shader, r300_nir_lower_fcsel_instr,
125 nir_metadata_control_flow, NULL);
126 }
127