1 /*
2 * Copyright 2018 Timothy Arceri
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "nir.h"
7 #include "nir_builder.h"
8 #include "nir_deref.h"
9 #include "util/u_dynarray.h"
10 #include "util/u_math.h"
11 #define XXH_INLINE_ALL
12 #include "util/xxhash.h"
13
14 /** @file nir_opt_vectorize_io.c
15 *
16 * Replaces scalar nir_load_input/nir_store_output operations with
17 * vectorized instructions.
18 */
19 bool
20 r600_vectorize_vs_inputs(nir_shader *shader);
21
22 static nir_deref_instr *
r600_clone_deref_array(nir_builder * b,nir_deref_instr * dst_tail,const nir_deref_instr * src_head)23 r600_clone_deref_array(nir_builder *b,
24 nir_deref_instr *dst_tail,
25 const nir_deref_instr *src_head)
26 {
27 const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
28
29 if (!parent)
30 return dst_tail;
31
32 assert(src_head->deref_type == nir_deref_type_array);
33
34 dst_tail = r600_clone_deref_array(b, dst_tail, parent);
35
36 return nir_build_deref_array(b, dst_tail, src_head->arr.index.ssa);
37 }
38
39 static bool
r600_variable_can_rewrite(nir_variable * var)40 r600_variable_can_rewrite(nir_variable *var)
41 {
42
43 /* Skip complex types we don't split in the first place */
44 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
45 return false;
46
47 /* TODO: add 64/16bit support ? */
48 if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
49 return false;
50
51 /* We only check VSand attribute inputs */
52 return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
53 var->data.location <= VERT_ATTRIB_GENERIC15);
54 }
55
56 static bool
r600_instr_can_rewrite(nir_instr * instr)57 r600_instr_can_rewrite(nir_instr *instr)
58 {
59 if (instr->type != nir_instr_type_intrinsic)
60 return false;
61
62 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
63
64 if (intr->num_components > 3)
65 return false;
66
67 if (intr->intrinsic != nir_intrinsic_load_deref)
68 return false;
69
70 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
71 if (!nir_deref_mode_is(deref, nir_var_shader_in))
72 return false;
73
74 return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
75 }
76
77 static bool
r600_io_access_same_var(const nir_instr * instr1,const nir_instr * instr2)78 r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
79 {
80 assert(instr1->type == nir_instr_type_intrinsic &&
81 instr2->type == nir_instr_type_intrinsic);
82
83 nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
84 nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
85
86 nir_variable *var1 = nir_intrinsic_get_var(intr1, 0);
87 nir_variable *var2 = nir_intrinsic_get_var(intr2, 0);
88
89 /* We don't handle combining vars of different base types, so skip those */
90 if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
91 return false;
92
93 if (var1->data.location != var2->data.location)
94 return false;
95
96 return true;
97 }
98
99 static struct util_dynarray *
r600_vec_instr_stack_create(void * mem_ctx)100 r600_vec_instr_stack_create(void *mem_ctx)
101 {
102 struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
103 util_dynarray_init(stack, mem_ctx);
104 return stack;
105 }
106
107 static void
r600_vec_instr_stack_push(struct util_dynarray * stack,nir_instr * instr)108 r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
109 {
110 util_dynarray_append(stack, nir_instr *, instr);
111 }
112
113 static unsigned
r600_correct_location(nir_variable * var)114 r600_correct_location(nir_variable *var)
115 {
116 return var->data.location - VERT_ATTRIB_GENERIC0;
117 }
118
119 static void
r600_create_new_load(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,unsigned comp,unsigned num_comps,unsigned old_num_comps)120 r600_create_new_load(nir_builder *b,
121 nir_intrinsic_instr *intr,
122 nir_variable *var,
123 unsigned comp,
124 unsigned num_comps,
125 unsigned old_num_comps)
126 {
127 unsigned channels[4];
128
129 b->cursor = nir_before_instr(&intr->instr);
130
131 nir_intrinsic_instr *new_intr = nir_intrinsic_instr_create(b->shader, intr->intrinsic);
132 nir_def_init(&new_intr->instr, &new_intr->def, num_comps,
133 intr->def.bit_size);
134 new_intr->num_components = num_comps;
135
136 nir_deref_instr *deref = nir_build_deref_var(b, var);
137 deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
138
139 new_intr->src[0] = nir_src_for_ssa(&deref->def);
140
141 if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
142 intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
143 new_intr->src[1] = nir_src_for_ssa(intr->src[1].ssa);
144
145 nir_builder_instr_insert(b, &new_intr->instr);
146
147 for (unsigned i = 0; i < old_num_comps; ++i)
148 channels[i] = comp - var->data.location_frac + i;
149 nir_def *load = nir_swizzle(b, &new_intr->def, channels, old_num_comps);
150 nir_def_replace(&intr->def, load);
151 }
152
153 static bool
r600_vec_instr_stack_pop(nir_builder * b,struct util_dynarray * stack,nir_instr * instr,nir_variable * updated_vars[16][4])154 r600_vec_instr_stack_pop(nir_builder *b,
155 struct util_dynarray *stack,
156 nir_instr *instr,
157 nir_variable *updated_vars[16][4])
158 {
159 nir_instr *last = util_dynarray_pop(stack, nir_instr *);
160
161 assert(last == instr);
162 assert(last->type == nir_instr_type_intrinsic);
163
164 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
165 nir_variable *var = nir_intrinsic_get_var(intr, 0);
166 unsigned loc = r600_correct_location(var);
167
168 nir_variable *new_var;
169 new_var = updated_vars[loc][var->data.location_frac];
170
171 unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type));
172
173 unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type));
174
175 /* Don't bother walking the stack if this component can't be vectorised. */
176 if (old_num_comps > 3) {
177 return false;
178 }
179
180 if (new_var == var) {
181 return false;
182 }
183
184 r600_create_new_load(
185 b, intr, new_var, var->data.location_frac, num_comps, old_num_comps);
186 return true;
187 }
188
189 static bool
r600_cmp_func(const void * data1,const void * data2)190 r600_cmp_func(const void *data1, const void *data2)
191 {
192 const struct util_dynarray *arr1 = data1;
193 const struct util_dynarray *arr2 = data2;
194
195 const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
196 const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
197
198 return r600_io_access_same_var(instr1, instr2);
199 }
200
201 #define HASH(hash, data) XXH32(&(data), sizeof(data), (hash))
202
203 static uint32_t
r600_hash_instr(const nir_instr * instr)204 r600_hash_instr(const nir_instr *instr)
205 {
206 assert(instr->type == nir_instr_type_intrinsic);
207
208 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
209 nir_variable *var = nir_intrinsic_get_var(intr, 0);
210
211 uint32_t hash = 0;
212
213 hash = HASH(hash, var->type);
214 return HASH(hash, var->data.location);
215 }
216
217 static uint32_t
r600_hash_stack(const void * data)218 r600_hash_stack(const void *data)
219 {
220 const struct util_dynarray *stack = data;
221 const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
222 return r600_hash_instr(first);
223 }
224
225 static struct set *
r600_vec_instr_set_create(void)226 r600_vec_instr_set_create(void)
227 {
228 return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
229 }
230
231 static void
r600_vec_instr_set_destroy(struct set * instr_set)232 r600_vec_instr_set_destroy(struct set *instr_set)
233 {
234 _mesa_set_destroy(instr_set, NULL);
235 }
236
237 static void
r600_vec_instr_set_add(struct set * instr_set,nir_instr * instr)238 r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
239 {
240 if (!r600_instr_can_rewrite(instr)) {
241 return;
242 }
243
244 struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
245 r600_vec_instr_stack_push(new_stack, instr);
246
247 struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
248
249 if (entry) {
250 ralloc_free(new_stack);
251 struct util_dynarray *stack = (struct util_dynarray *)entry->key;
252 r600_vec_instr_stack_push(stack, instr);
253 return;
254 }
255
256 _mesa_set_add(instr_set, new_stack);
257
258 return;
259 }
260
261 static bool
r600_vec_instr_set_remove(nir_builder * b,struct set * instr_set,nir_instr * instr,nir_variable * updated_vars[16][4])262 r600_vec_instr_set_remove(nir_builder *b,
263 struct set *instr_set,
264 nir_instr *instr,
265 nir_variable *updated_vars[16][4])
266 {
267 if (!r600_instr_can_rewrite(instr)) {
268 return false;
269 }
270 /*
271 * It's pretty unfortunate that we have to do this, but it's a side effect
272 * of the hash set interfaces. The hash set assumes that we're only
273 * interested in storing one equivalent element at a time, and if we try to
274 * insert a duplicate element it will remove the original. We could hack up
275 * the comparison function to "know" which input is an instruction we
276 * passed in and which is an array that's part of the entry, but that
277 * wouldn't work because we need to pass an array to _mesa_set_add() in
278 * vec_instr_add() above, and _mesa_set_add() will call our comparison
279 * function as well.
280 */
281 struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
282 r600_vec_instr_stack_push(temp, instr);
283 struct set_entry *entry = _mesa_set_search(instr_set, temp);
284 ralloc_free(temp);
285
286 if (entry) {
287 struct util_dynarray *stack = (struct util_dynarray *)entry->key;
288 bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
289
290 if (!util_dynarray_num_elements(stack, nir_instr *))
291 _mesa_set_remove(instr_set, entry);
292
293 return progress;
294 }
295
296 return false;
297 }
298
299 static bool
r600_vectorize_block(nir_builder * b,nir_block * block,struct set * instr_set,nir_variable * updated_vars[16][4])300 r600_vectorize_block(nir_builder *b,
301 nir_block *block,
302 struct set *instr_set,
303 nir_variable *updated_vars[16][4])
304 {
305 bool progress = false;
306
307 nir_foreach_instr_safe(instr, block) { r600_vec_instr_set_add(instr_set, instr); }
308
309 for (unsigned i = 0; i < block->num_dom_children; i++) {
310 nir_block *child = block->dom_children[i];
311 progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
312 }
313
314 nir_foreach_instr_reverse_safe(instr, block)
315 {
316 progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
317 }
318
319 return progress;
320 }
321
322 static void
r600_create_new_io_var(nir_shader * shader,nir_variable * vars[16][4],unsigned location,unsigned comps)323 r600_create_new_io_var(nir_shader *shader,
324 nir_variable *vars[16][4],
325 unsigned location,
326 unsigned comps)
327 {
328 unsigned num_comps = util_bitcount(comps);
329 assert(num_comps > 1);
330
331 /* Note: u_bit_scan() strips a component of the comps bitfield here */
332 unsigned first_comp = u_bit_scan(&comps);
333
334 nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
335 var->data.location_frac = first_comp;
336 var->type = glsl_replace_vector_type(var->type, num_comps);
337
338 nir_shader_add_variable(shader, var);
339
340 vars[location][first_comp] = var;
341
342 while (comps) {
343 const int comp = u_bit_scan(&comps);
344 if (vars[location][comp]) {
345 vars[location][comp] = var;
346 }
347 }
348 }
349
350 static inline bool
r600_variables_can_merge(const nir_variable * lhs,const nir_variable * rhs)351 r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
352 {
353 return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
354 }
355
356 static void
r600_create_new_io_vars(nir_shader * shader,nir_variable_mode mode,nir_variable * vars[16][4])357 r600_create_new_io_vars(nir_shader *shader,
358 nir_variable_mode mode,
359 nir_variable *vars[16][4])
360 {
361 bool can_rewrite_vars = false;
362 nir_foreach_variable_with_modes(var, shader, mode)
363 {
364 if (r600_variable_can_rewrite(var)) {
365 can_rewrite_vars = true;
366 unsigned loc = r600_correct_location(var);
367 vars[loc][var->data.location_frac] = var;
368 }
369 }
370
371 if (!can_rewrite_vars)
372 return;
373
374 /* We don't handle combining vars of different type e.g. different array
375 * lengths.
376 */
377 for (unsigned i = 0; i < 16; i++) {
378 unsigned comps = 0;
379
380 for (unsigned j = 0; j < 3; j++) {
381
382 if (!vars[i][j])
383 continue;
384
385 for (unsigned k = j + 1; k < 4; k++) {
386 if (!vars[i][k])
387 continue;
388
389 if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
390 continue;
391
392 /* Set comps */
393 for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
394 comps |= 1 << (vars[i][j]->data.location_frac + n);
395
396 for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
397 comps |= 1 << (vars[i][k]->data.location_frac + n);
398 }
399 }
400 if (comps)
401 r600_create_new_io_var(shader, vars, i, comps);
402 }
403 }
404
405 static bool
r600_vectorize_io_impl(nir_function_impl * impl)406 r600_vectorize_io_impl(nir_function_impl *impl)
407 {
408 nir_builder b = nir_builder_create(impl);
409
410 nir_metadata_require(impl, nir_metadata_dominance);
411
412 nir_shader *shader = impl->function->shader;
413 nir_variable *updated_vars[16][4] = {0};
414
415 r600_create_new_io_vars(shader, nir_var_shader_in, updated_vars);
416
417 struct set *instr_set = r600_vec_instr_set_create();
418 bool progress =
419 r600_vectorize_block(&b, nir_start_block(impl), instr_set, updated_vars);
420
421 if (progress) {
422 nir_metadata_preserve(impl, nir_metadata_control_flow);
423 } else {
424 nir_metadata_preserve(impl, nir_metadata_all);
425 }
426
427 r600_vec_instr_set_destroy(instr_set);
428 return false;
429 }
430
431 bool
r600_vectorize_vs_inputs(nir_shader * shader)432 r600_vectorize_vs_inputs(nir_shader *shader)
433 {
434 bool progress = false;
435
436 if (shader->info.stage != MESA_SHADER_VERTEX)
437 return false;
438
439 nir_foreach_function_impl(impl, shader)
440 {
441 progress |= r600_vectorize_io_impl(impl);
442 }
443
444 return progress;
445 }
446