1 /*
2 * Copyright 2023 Pavel Ondračka <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "r300_nir.h"
7
8 #include "compiler/nir/nir_builder.h"
9 #include "r300_screen.h"
10
11 bool
r300_is_only_used_as_float(const nir_alu_instr * instr)12 r300_is_only_used_as_float(const nir_alu_instr *instr)
13 {
14 nir_foreach_use(src, &instr->def) {
15 if (nir_src_is_if(src))
16 return false;
17
18 nir_instr *user_instr = nir_src_parent_instr(src);
19 if (user_instr->type == nir_instr_type_alu) {
20 nir_alu_instr *alu = nir_instr_as_alu(user_instr);
21 switch (alu->op) {
22 case nir_op_mov:
23 case nir_op_vec2:
24 case nir_op_vec3:
25 case nir_op_vec4:
26 case nir_op_bcsel:
27 case nir_op_b32csel:
28 if (!r300_is_only_used_as_float(alu))
29 return false;
30 break;
31 default:
32 break;
33 }
34
35 const nir_op_info *info = &nir_op_infos[alu->op];
36 nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src);
37 int src_idx = alu_src - &alu->src[0];
38 if ((info->input_types[src_idx] & nir_type_int) ||
39 (info->input_types[src_idx] & nir_type_bool))
40 return false;
41 }
42 }
43 return true;
44 }
45
46 static unsigned char
r300_should_vectorize_instr(const nir_instr * instr,const void * data)47 r300_should_vectorize_instr(const nir_instr *instr, const void *data)
48 {
49 bool *too_many_ubos = (bool *) data;
50
51 if (instr->type != nir_instr_type_alu)
52 return 0;
53
54 /* Vectorization can make the constant layout worse and increase
55 * the constant register usage. The worst scenario is vectorization
56 * of lowered indirect register access, where we access i-th element
57 * and later we access i-1 or i+1 (most notably glamor and gsk shaders).
58 * In this case we already added constants 1..n where n is the array
59 * size, however we can reuse them unless the lowered ladder gets
60 * vectorized later.
61 *
62 * Thus prevent vectorization of the specific patterns from lowered
63 * indirect access.
64 *
65 * This is quite a heavy hammer, we could in theory estimate how many
66 * slots will the current ubos and constants need and only disable
67 * vectorization when we are close to the limit. However, this would
68 * likely need a global shader analysis each time r300_should_vectorize_inst
69 * is called, which we want to avoid.
70 *
71 * So for now just don't vectorize anything that loads constants.
72 */
73 if (*too_many_ubos) {
74 nir_alu_instr *alu = nir_instr_as_alu(instr);
75 unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
76 for (unsigned i = 0; i < num_srcs; i++) {
77 if (nir_src_is_const(alu->src[i].src)) {
78 return 0;
79 }
80 }
81 }
82
83 return 4;
84 }
85
86 /* R300 and R400 have just 32 vec4 constant register slots in fs.
87 * Therefore, while its possible we will be able to compact some of
88 * the constants later, we need to be extra careful with adding
89 * new constants anyway.
90 */
have_too_many_ubos(nir_shader * s,bool is_r500)91 static bool have_too_many_ubos(nir_shader *s, bool is_r500)
92 {
93 if (s->info.stage != MESA_SHADER_FRAGMENT)
94 return false;
95
96 if (is_r500)
97 return false;
98
99 nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) {
100 int ubo = var->data.driver_location;
101 assert (ubo == 0);
102
103 unsigned size = glsl_get_explicit_size(var->interface_type, false);
104 if (DIV_ROUND_UP(size, 16) > 32)
105 return true;
106 }
107
108 return false;
109 }
110
111 static bool
set_speculate(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * _)112 set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
113 {
114 if (intr->intrinsic == nir_intrinsic_load_ubo_vec4) {
115 nir_intrinsic_set_access(intr, nir_intrinsic_access(intr) | ACCESS_CAN_SPECULATE);
116 return true;
117 }
118 return false;
119 }
120
121 static void
r300_optimize_nir(struct nir_shader * s,struct pipe_screen * screen)122 r300_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
123 {
124 bool is_r500 = r300_screen(screen)->caps.is_r500;
125
126 bool progress;
127 if (s->info.stage == MESA_SHADER_FRAGMENT) {
128 if (is_r500) {
129 NIR_PASS_V(s, r300_transform_fs_trig_input);
130 }
131 } else {
132 if (r300_screen(screen)->caps.has_tcl) {
133 if (r300_screen(screen)->caps.is_r500) {
134 /* Only nine should set both NTT shader name and
135 * use_legacy_math_rules and D3D9 already mandates
136 * the proper range for the trigonometric inputs.
137 */
138 if (!s->info.use_legacy_math_rules || !(s->info.name && !strcmp("TTN", s->info.name))) {
139 NIR_PASS_V(s, r300_transform_vs_trig_input);
140 }
141 } else {
142 if (r300_screen(screen)->caps.is_r400) {
143 NIR_PASS_V(s, r300_transform_vs_trig_input);
144 }
145 }
146 }
147 }
148
149 do {
150 progress = false;
151
152 NIR_PASS_V(s, nir_lower_vars_to_ssa);
153
154 NIR_PASS(progress, s, nir_copy_prop);
155 NIR_PASS(progress, s, r300_nir_lower_flrp);
156 NIR_PASS(progress, s, nir_opt_algebraic);
157 if (s->info.stage == MESA_SHADER_VERTEX) {
158 if (!is_r500)
159 NIR_PASS(progress, s, r300_nir_lower_bool_to_float);
160 NIR_PASS(progress, s, r300_nir_fuse_fround_d3d9);
161 }
162 NIR_PASS(progress, s, nir_opt_constant_folding);
163 NIR_PASS(progress, s, nir_opt_remove_phis);
164 NIR_PASS(progress, s, nir_opt_conditional_discard);
165 NIR_PASS(progress, s, nir_opt_dce);
166 NIR_PASS(progress, s, nir_opt_dead_cf);
167 NIR_PASS(progress, s, nir_opt_cse);
168 NIR_PASS(progress, s, nir_opt_find_array_copies);
169 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
170 NIR_PASS(progress, s, nir_opt_dead_write_vars);
171
172 NIR_PASS(progress, s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
173 if (is_r500)
174 nir_shader_intrinsics_pass(s, set_speculate,
175 nir_metadata_control_flow, NULL);
176 NIR_PASS(progress, s, nir_opt_peephole_select, is_r500 ? 8 : ~0, true, true);
177 if (s->info.stage == MESA_SHADER_FRAGMENT) {
178 NIR_PASS(progress, s, r300_nir_lower_bool_to_float_fs);
179 }
180 NIR_PASS(progress, s, nir_opt_algebraic);
181 NIR_PASS(progress, s, nir_opt_constant_folding);
182 NIR_PASS(progress, s, nir_opt_shrink_stores, true);
183 NIR_PASS(progress, s, nir_opt_shrink_vectors, false);
184 NIR_PASS(progress, s, nir_opt_loop);
185
186 bool too_many_ubos = have_too_many_ubos(s, is_r500);
187 NIR_PASS(progress, s, nir_opt_vectorize, r300_should_vectorize_instr,
188 &too_many_ubos);
189 NIR_PASS(progress, s, nir_opt_undef);
190 if(!progress)
191 NIR_PASS(progress, s, nir_lower_undef_to_zero);
192 NIR_PASS(progress, s, nir_opt_loop_unroll);
193
194 /* Try to fold addressing math into ubo_vec4's base to avoid load_consts
195 * and ALU ops for it.
196 */
197 nir_opt_offsets_options offset_options = {
198 .ubo_vec4_max = 255,
199
200 /* No const offset in TGSI for shared accesses. */
201 .shared_max = 0,
202
203 /* unused intrinsics */
204 .uniform_max = 0,
205 .buffer_max = 0,
206 };
207
208 NIR_PASS(progress, s, nir_opt_offsets, &offset_options);
209 } while (progress);
210
211 NIR_PASS_V(s, nir_lower_var_copies);
212 NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp,
213 NULL);
214 }
215
r300_check_control_flow(nir_shader * s)216 static char *r300_check_control_flow(nir_shader *s)
217 {
218 nir_function_impl *impl = nir_shader_get_entrypoint(s);
219 nir_block *first = nir_start_block(impl);
220 nir_cf_node *next = nir_cf_node_next(&first->cf_node);
221
222 if (next) {
223 switch (next->type) {
224 case nir_cf_node_if:
225 return "If/then statements not supported by R300/R400 shaders, should have been flattened by peephole_select.";
226 case nir_cf_node_loop:
227 return "Looping not supported R300/R400 shaders, all loops must be statically unrollable.";
228 default:
229 return "Unknown control flow type";
230 }
231 }
232
233 return NULL;
234 }
235
236 char *
r300_finalize_nir(struct pipe_screen * pscreen,void * nir)237 r300_finalize_nir(struct pipe_screen *pscreen, void *nir)
238 {
239 nir_shader *s = nir;
240
241 r300_optimize_nir(s, pscreen);
242
243 /* st_program.c's parameter list optimization requires that future nir
244 * variants don't reallocate the uniform storage, so we have to remove
245 * uniforms that occupy storage. But we don't want to remove samplers,
246 * because they're needed for YUV variant lowering.
247 */
248 nir_remove_dead_derefs(s);
249 nir_foreach_uniform_variable_safe(var, s) {
250 if (var->data.mode == nir_var_uniform &&
251 (glsl_type_get_image_count(var->type) ||
252 glsl_type_get_sampler_count(var->type)))
253 continue;
254
255 exec_node_remove(&var->node);
256 }
257 nir_validate_shader(s, "after uniform var removal");
258
259 nir_sweep(s);
260
261 if (!r300_screen(pscreen)->caps.is_r500 &&
262 (r300_screen(pscreen)->caps.has_tcl || s->info.stage == MESA_SHADER_FRAGMENT)) {
263 char *msg = r300_check_control_flow(s);
264 if (msg)
265 return strdup(msg);
266 }
267
268 return NULL;
269 }
270