1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir/nir_builder.h"
25 #include "nir.h"
26 #include "nir_constant_expressions.h"
27 #include "nir_control_flow.h"
28 #include "nir_loop_analyze.h"
29
30 static bool
is_two_src_comparison(const nir_alu_instr * instr)31 is_two_src_comparison(const nir_alu_instr *instr)
32 {
33 switch (instr->op) {
34 case nir_op_flt:
35 case nir_op_flt32:
36 case nir_op_fge:
37 case nir_op_fge32:
38 case nir_op_feq:
39 case nir_op_feq32:
40 case nir_op_fneu:
41 case nir_op_fneu32:
42 case nir_op_ilt:
43 case nir_op_ilt32:
44 case nir_op_ult:
45 case nir_op_ult32:
46 case nir_op_ige:
47 case nir_op_ige32:
48 case nir_op_uge:
49 case nir_op_uge32:
50 case nir_op_ieq:
51 case nir_op_ieq32:
52 case nir_op_ine:
53 case nir_op_ine32:
54 return true;
55 default:
56 return false;
57 }
58 }
59
60 static inline bool
is_zero(const nir_alu_instr * instr,unsigned src,unsigned num_components,const uint8_t * swizzle)61 is_zero(const nir_alu_instr *instr, unsigned src, unsigned num_components,
62 const uint8_t *swizzle)
63 {
64 /* only constant srcs: */
65 if (!nir_src_is_const(instr->src[src].src))
66 return false;
67
68 for (unsigned i = 0; i < num_components; i++) {
69 nir_alu_type type = nir_op_infos[instr->op].input_types[src];
70 switch (nir_alu_type_get_base_type(type)) {
71 case nir_type_int:
72 case nir_type_uint: {
73 if (nir_src_comp_as_int(instr->src[src].src, swizzle[i]) != 0)
74 return false;
75 break;
76 }
77 case nir_type_float: {
78 if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) != 0)
79 return false;
80 break;
81 }
82 default:
83 return false;
84 }
85 }
86
87 return true;
88 }
89
90 static bool
all_uses_are_bcsel(const nir_alu_instr * instr)91 all_uses_are_bcsel(const nir_alu_instr *instr)
92 {
93 nir_foreach_use(use, &instr->def) {
94 if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
95 return false;
96
97 nir_alu_instr *const alu = nir_instr_as_alu(nir_src_parent_instr(use));
98 if (alu->op != nir_op_bcsel &&
99 alu->op != nir_op_b32csel)
100 return false;
101
102 /* Not only must the result be used by a bcsel, but it must be used as
103 * the first source (the condition).
104 */
105 if (alu->src[0].src.ssa != &instr->def)
106 return false;
107 }
108
109 return true;
110 }
111
112 static bool
all_uses_are_compare_with_zero(const nir_alu_instr * instr)113 all_uses_are_compare_with_zero(const nir_alu_instr *instr)
114 {
115 nir_foreach_use(use, &instr->def) {
116 if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
117 return false;
118
119 nir_alu_instr *const alu = nir_instr_as_alu(nir_src_parent_instr(use));
120 if (!is_two_src_comparison(alu))
121 return false;
122
123 if (!is_zero(alu, 0, 1, alu->src[0].swizzle) &&
124 !is_zero(alu, 1, 1, alu->src[1].swizzle))
125 return false;
126
127 if (!all_uses_are_bcsel(alu))
128 return false;
129 }
130
131 return true;
132 }
133
134 static bool
nir_opt_rematerialize_compares_impl(nir_shader * shader,nir_function_impl * impl)135 nir_opt_rematerialize_compares_impl(nir_shader *shader, nir_function_impl *impl)
136 {
137 bool progress = false;
138
139 nir_foreach_block(block, impl) {
140 nir_foreach_instr(instr, block) {
141 if (instr->type != nir_instr_type_alu)
142 continue;
143
144 nir_alu_instr *const alu = nir_instr_as_alu(instr);
145 if (!is_two_src_comparison(alu))
146 continue;
147
148 if (!all_uses_are_bcsel(alu))
149 continue;
150
151 /* At this point it is known that alu is a comparison instruction
152 * that is only used by nir_op_bcsel and possibly by if-statements
153 * (though the latter has not been explicitly checked).
154 *
155 * Iterate through each use of the comparison. For every use (or use
156 * by an if-statement) that is in a different block, emit a copy of
157 * the comparison. Care must be taken here. The original
158 * instruction must be duplicated only once in each block because CSE
159 * cannot be run after this pass.
160 */
161 nir_foreach_use_including_if_safe(use, &alu->def) {
162 if (nir_src_is_if(use)) {
163 nir_if *const if_stmt = nir_src_parent_if(use);
164
165 nir_block *const prev_block =
166 nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
167
168 /* If the compare is from the previous block, don't
169 * rematerialize.
170 */
171 if (prev_block == alu->instr.block)
172 continue;
173
174 nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
175
176 nir_instr_insert_after_block(prev_block, &clone->instr);
177
178 nir_src_rewrite(&if_stmt->condition, &clone->def);
179 progress = true;
180 } else {
181 nir_instr *const use_instr = nir_src_parent_instr(use);
182
183 /* If the use is in the same block as the def, don't
184 * rematerialize.
185 */
186 if (use_instr->block == alu->instr.block)
187 continue;
188
189 nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
190
191 nir_instr_insert_before(use_instr, &clone->instr);
192
193 nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr);
194 for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
195 if (use_alu->src[i].src.ssa == &alu->def) {
196 nir_src_rewrite(&use_alu->src[i].src, &clone->def);
197 progress = true;
198 }
199 }
200 }
201 }
202 }
203 }
204
205 if (progress) {
206 nir_metadata_preserve(impl, nir_metadata_control_flow);
207 } else {
208 nir_metadata_preserve(impl, nir_metadata_all);
209 }
210
211 return progress;
212 }
213
214 static bool
nir_opt_rematerialize_alu_impl(nir_shader * shader,nir_function_impl * impl)215 nir_opt_rematerialize_alu_impl(nir_shader *shader, nir_function_impl *impl)
216 {
217 bool progress = false;
218
219 nir_foreach_block(block, impl) {
220 nir_foreach_instr(instr, block) {
221 if (instr->type != nir_instr_type_alu)
222 continue;
223
224 nir_alu_instr *const alu = nir_instr_as_alu(instr);
225
226 /* This list only include ALU ops that are likely to be able to have
227 * cmod propagation on Intel GPUs.
228 */
229 switch (alu->op) {
230 case nir_op_ineg:
231 case nir_op_iabs:
232 case nir_op_fneg:
233 case nir_op_fabs:
234 case nir_op_fadd:
235 case nir_op_iadd:
236 case nir_op_iadd_sat:
237 case nir_op_uadd_sat:
238 case nir_op_isub_sat:
239 case nir_op_usub_sat:
240 case nir_op_irhadd:
241 case nir_op_urhadd:
242 case nir_op_fmul:
243 case nir_op_inot:
244 case nir_op_iand:
245 case nir_op_ior:
246 case nir_op_ixor:
247 case nir_op_ffloor:
248 case nir_op_ffract:
249 case nir_op_uclz:
250 case nir_op_ishl:
251 case nir_op_ishr:
252 case nir_op_ushr:
253 case nir_op_urol:
254 case nir_op_uror:
255 break; /* ... from switch. */
256 default:
257 continue; /* ... with loop. */
258 }
259
260 /* To help prevent increasing live ranges, require that one of the
261 * sources be a constant.
262 */
263 if (nir_op_infos[alu->op].num_inputs == 2 &&
264 !nir_src_is_const(alu->src[0].src) &&
265 !nir_src_is_const(alu->src[1].src))
266 continue;
267
268 if (!all_uses_are_compare_with_zero(alu))
269 continue;
270
271 /* At this point it is known that the alu is only used by a
272 * comparison with zero that is used by nir_op_bcsel and possibly by
273 * if-statements (though the latter has not been explicitly checked).
274 *
275 * Iterate through each use of the ALU. For every use that is in a
276 * different block, emit a copy of the ALU. Care must be taken here.
277 * The original instruction must be duplicated only once in each
278 * block because CSE cannot be run after this pass.
279 */
280 nir_foreach_use_safe(use, &alu->def) {
281 nir_instr *const use_instr = nir_src_parent_instr(use);
282
283 /* If the use is in the same block as the def, don't
284 * rematerialize.
285 */
286 if (use_instr->block == alu->instr.block)
287 continue;
288
289 nir_alu_instr *clone = nir_alu_instr_clone(shader, alu);
290
291 nir_instr_insert_before(use_instr, &clone->instr);
292
293 nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr);
294 for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) {
295 if (use_alu->src[i].src.ssa == &alu->def) {
296 nir_src_rewrite(&use_alu->src[i].src, &clone->def);
297 progress = true;
298 }
299 }
300 }
301 }
302 }
303
304 if (progress) {
305 nir_metadata_preserve(impl, nir_metadata_control_flow);
306 } else {
307 nir_metadata_preserve(impl, nir_metadata_all);
308 }
309
310 return progress;
311 }
312
313 bool
nir_opt_rematerialize_compares(nir_shader * shader)314 nir_opt_rematerialize_compares(nir_shader *shader)
315 {
316 bool progress = false;
317
318 nir_foreach_function_impl(impl, shader) {
319 progress = nir_opt_rematerialize_compares_impl(shader, impl) || progress;
320
321 progress = nir_opt_rematerialize_alu_impl(shader, impl) || progress;
322 }
323
324 return progress;
325 }
326