1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 static bool
assert_ssa_def_is_not_1bit(nir_def * def,UNUSED void * unused)28 assert_ssa_def_is_not_1bit(nir_def *def, UNUSED void *unused)
29 {
30 assert(def->bit_size > 1);
31 return true;
32 }
33
34 static bool
rewrite_1bit_ssa_def_to_32bit(nir_def * def,void * _progress)35 rewrite_1bit_ssa_def_to_32bit(nir_def *def, void *_progress)
36 {
37 bool *progress = _progress;
38 if (def->bit_size == 1) {
39 def->bit_size = 32;
40 *progress = true;
41 }
42 return true;
43 }
44
45 static bool
lower_alu_instr(nir_builder * b,nir_alu_instr * alu,bool has_fcsel_ne,bool has_fcsel_gt)46 lower_alu_instr(nir_builder *b, nir_alu_instr *alu, bool has_fcsel_ne,
47 bool has_fcsel_gt)
48 {
49 const nir_op_info *op_info = &nir_op_infos[alu->op];
50
51 b->cursor = nir_before_instr(&alu->instr);
52
53 /* Replacement SSA value */
54 nir_def *rep = NULL;
55 switch (alu->op) {
56 case nir_op_mov:
57 case nir_op_vec2:
58 case nir_op_vec3:
59 case nir_op_vec4:
60 case nir_op_vec5:
61 case nir_op_vec8:
62 case nir_op_vec16:
63 if (alu->def.bit_size != 1)
64 return false;
65 /* These we expect to have booleans but the opcode doesn't change */
66 break;
67
68 case nir_op_b2f32:
69 alu->op = nir_op_mov;
70 break;
71 case nir_op_b2i32:
72 alu->op = nir_op_mov;
73 break;
74 case nir_op_b2b1:
75 alu->op = nir_op_mov;
76 break;
77
78 case nir_op_flt:
79 alu->op = nir_op_slt;
80 break;
81 case nir_op_fge:
82 alu->op = nir_op_sge;
83 break;
84 case nir_op_feq:
85 alu->op = nir_op_seq;
86 break;
87 case nir_op_fneu:
88 alu->op = nir_op_sne;
89 break;
90 case nir_op_ilt:
91 alu->op = nir_op_slt;
92 break;
93 case nir_op_ige:
94 alu->op = nir_op_sge;
95 break;
96 case nir_op_ieq:
97 alu->op = nir_op_seq;
98 break;
99 case nir_op_ine:
100 alu->op = nir_op_sne;
101 break;
102 case nir_op_ult:
103 alu->op = nir_op_slt;
104 break;
105 case nir_op_uge:
106 alu->op = nir_op_sge;
107 break;
108
109 case nir_op_ball_fequal2:
110 alu->op = nir_op_fall_equal2;
111 break;
112 case nir_op_ball_fequal3:
113 alu->op = nir_op_fall_equal3;
114 break;
115 case nir_op_ball_fequal4:
116 alu->op = nir_op_fall_equal4;
117 break;
118 case nir_op_bany_fnequal2:
119 alu->op = nir_op_fany_nequal2;
120 break;
121 case nir_op_bany_fnequal3:
122 alu->op = nir_op_fany_nequal3;
123 break;
124 case nir_op_bany_fnequal4:
125 alu->op = nir_op_fany_nequal4;
126 break;
127 case nir_op_ball_iequal2:
128 alu->op = nir_op_fall_equal2;
129 break;
130 case nir_op_ball_iequal3:
131 alu->op = nir_op_fall_equal3;
132 break;
133 case nir_op_ball_iequal4:
134 alu->op = nir_op_fall_equal4;
135 break;
136 case nir_op_bany_inequal2:
137 alu->op = nir_op_fany_nequal2;
138 break;
139 case nir_op_bany_inequal3:
140 alu->op = nir_op_fany_nequal3;
141 break;
142 case nir_op_bany_inequal4:
143 alu->op = nir_op_fany_nequal4;
144 break;
145
146 case nir_op_bcsel:
147 if (has_fcsel_gt)
148 alu->op = nir_op_fcsel_gt;
149 else if (has_fcsel_ne)
150 alu->op = nir_op_fcsel;
151 else {
152 /* Only a few pre-VS 4.0 platforms (e.g., r300 vertex shaders) should
153 * hit this path.
154 */
155 rep = nir_flrp(b,
156 nir_ssa_for_alu_src(b, alu, 2),
157 nir_ssa_for_alu_src(b, alu, 1),
158 nir_ssa_for_alu_src(b, alu, 0));
159 }
160
161 break;
162
163 case nir_op_iand:
164 alu->op = nir_op_fmul;
165 break;
166 case nir_op_ixor:
167 alu->op = nir_op_sne;
168 break;
169 case nir_op_ior:
170 alu->op = nir_op_fmax;
171 break;
172
173 case nir_op_inot:
174 rep = nir_seq(b, nir_ssa_for_alu_src(b, alu, 0),
175 nir_imm_float(b, 0));
176 break;
177
178 default:
179 assert(alu->def.bit_size > 1);
180 for (unsigned i = 0; i < op_info->num_inputs; i++)
181 assert(alu->src[i].src.ssa->bit_size > 1);
182 return false;
183 }
184
185 if (rep) {
186 /* We've emitted a replacement instruction */
187 nir_def_replace(&alu->def, rep);
188 } else {
189 if (alu->def.bit_size == 1)
190 alu->def.bit_size = 32;
191 }
192
193 return true;
194 }
195
196 static bool
lower_tex_instr(nir_tex_instr * tex)197 lower_tex_instr(nir_tex_instr *tex)
198 {
199 bool progress = false;
200 rewrite_1bit_ssa_def_to_32bit(&tex->def, &progress);
201 if (tex->dest_type == nir_type_bool1) {
202 tex->dest_type = nir_type_bool32;
203 progress = true;
204 }
205 return progress;
206 }
207
208 struct lower_bool_to_float_data {
209 bool has_fcsel_ne;
210 bool has_fcsel_gt;
211 };
212
213 static bool
nir_lower_bool_to_float_instr(nir_builder * b,nir_instr * instr,void * cb_data)214 nir_lower_bool_to_float_instr(nir_builder *b,
215 nir_instr *instr,
216 void *cb_data)
217 {
218 struct lower_bool_to_float_data *data = cb_data;
219
220 switch (instr->type) {
221 case nir_instr_type_alu:
222 return lower_alu_instr(b, nir_instr_as_alu(instr),
223 data->has_fcsel_ne, data->has_fcsel_gt);
224
225 case nir_instr_type_load_const: {
226 nir_load_const_instr *load = nir_instr_as_load_const(instr);
227 if (load->def.bit_size == 1) {
228 nir_const_value *value = load->value;
229 for (unsigned i = 0; i < load->def.num_components; i++)
230 load->value[i].f32 = value[i].b ? 1.0 : 0.0;
231 load->def.bit_size = 32;
232 return true;
233 }
234 return false;
235 }
236
237 case nir_instr_type_intrinsic:
238 case nir_instr_type_undef:
239 case nir_instr_type_phi: {
240 bool progress = false;
241 nir_foreach_def(instr, rewrite_1bit_ssa_def_to_32bit, &progress);
242 return progress;
243 }
244
245 case nir_instr_type_tex:
246 return lower_tex_instr(nir_instr_as_tex(instr));
247
248 default:
249 nir_foreach_def(instr, assert_ssa_def_is_not_1bit, NULL);
250 return false;
251 }
252 }
253
254 bool
nir_lower_bool_to_float(nir_shader * shader,bool has_fcsel_ne)255 nir_lower_bool_to_float(nir_shader *shader, bool has_fcsel_ne)
256 {
257 struct lower_bool_to_float_data data = {
258 .has_fcsel_ne = has_fcsel_ne,
259 .has_fcsel_gt = shader->options->has_fused_comp_and_csel
260 };
261
262 return nir_shader_instructions_pass(shader, nir_lower_bool_to_float_instr,
263 nir_metadata_control_flow,
264 &data);
265 }
266