xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2018 Timothy Arceri
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nir.h"
7 #include "nir_builder.h"
8 #include "nir_deref.h"
9 #include "util/u_dynarray.h"
10 #include "util/u_math.h"
11 #define XXH_INLINE_ALL
12 #include "util/xxhash.h"
13 
14 /** @file nir_opt_vectorize_io.c
15  *
16  * Replaces scalar nir_load_input/nir_store_output operations with
17  * vectorized instructions.
18  */
19 bool
20 r600_vectorize_vs_inputs(nir_shader *shader);
21 
22 static nir_deref_instr *
r600_clone_deref_array(nir_builder * b,nir_deref_instr * dst_tail,const nir_deref_instr * src_head)23 r600_clone_deref_array(nir_builder *b,
24                        nir_deref_instr *dst_tail,
25                        const nir_deref_instr *src_head)
26 {
27    const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
28 
29    if (!parent)
30       return dst_tail;
31 
32    assert(src_head->deref_type == nir_deref_type_array);
33 
34    dst_tail = r600_clone_deref_array(b, dst_tail, parent);
35 
36    return nir_build_deref_array(b, dst_tail, src_head->arr.index.ssa);
37 }
38 
39 static bool
r600_variable_can_rewrite(nir_variable * var)40 r600_variable_can_rewrite(nir_variable *var)
41 {
42 
43    /* Skip complex types we don't split in the first place */
44    if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
45       return false;
46 
47    /* TODO: add 64/16bit support ? */
48    if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
49       return false;
50 
51    /* We only check VSand attribute inputs */
52    return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
53            var->data.location <= VERT_ATTRIB_GENERIC15);
54 }
55 
56 static bool
r600_instr_can_rewrite(nir_instr * instr)57 r600_instr_can_rewrite(nir_instr *instr)
58 {
59    if (instr->type != nir_instr_type_intrinsic)
60       return false;
61 
62    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
63 
64    if (intr->num_components > 3)
65       return false;
66 
67    if (intr->intrinsic != nir_intrinsic_load_deref)
68       return false;
69 
70    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
71    if (!nir_deref_mode_is(deref, nir_var_shader_in))
72       return false;
73 
74    return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
75 }
76 
77 static bool
r600_io_access_same_var(const nir_instr * instr1,const nir_instr * instr2)78 r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
79 {
80    assert(instr1->type == nir_instr_type_intrinsic &&
81           instr2->type == nir_instr_type_intrinsic);
82 
83    nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
84    nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
85 
86    nir_variable *var1 = nir_intrinsic_get_var(intr1, 0);
87    nir_variable *var2 = nir_intrinsic_get_var(intr2, 0);
88 
89    /* We don't handle combining vars of different base types, so skip those */
90    if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
91       return false;
92 
93    if (var1->data.location != var2->data.location)
94       return false;
95 
96    return true;
97 }
98 
99 static struct util_dynarray *
r600_vec_instr_stack_create(void * mem_ctx)100 r600_vec_instr_stack_create(void *mem_ctx)
101 {
102    struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
103    util_dynarray_init(stack, mem_ctx);
104    return stack;
105 }
106 
107 static void
r600_vec_instr_stack_push(struct util_dynarray * stack,nir_instr * instr)108 r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
109 {
110    util_dynarray_append(stack, nir_instr *, instr);
111 }
112 
113 static unsigned
r600_correct_location(nir_variable * var)114 r600_correct_location(nir_variable *var)
115 {
116    return var->data.location - VERT_ATTRIB_GENERIC0;
117 }
118 
119 static void
r600_create_new_load(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,unsigned comp,unsigned num_comps,unsigned old_num_comps)120 r600_create_new_load(nir_builder *b,
121                      nir_intrinsic_instr *intr,
122                      nir_variable *var,
123                      unsigned comp,
124                      unsigned num_comps,
125                      unsigned old_num_comps)
126 {
127    unsigned channels[4];
128 
129    b->cursor = nir_before_instr(&intr->instr);
130 
131    nir_intrinsic_instr *new_intr = nir_intrinsic_instr_create(b->shader, intr->intrinsic);
132    nir_def_init(&new_intr->instr, &new_intr->def, num_comps,
133                 intr->def.bit_size);
134    new_intr->num_components = num_comps;
135 
136    nir_deref_instr *deref = nir_build_deref_var(b, var);
137    deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
138 
139    new_intr->src[0] = nir_src_for_ssa(&deref->def);
140 
141    if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
142        intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
143       new_intr->src[1] = nir_src_for_ssa(intr->src[1].ssa);
144 
145    nir_builder_instr_insert(b, &new_intr->instr);
146 
147    for (unsigned i = 0; i < old_num_comps; ++i)
148       channels[i] = comp - var->data.location_frac + i;
149    nir_def *load = nir_swizzle(b, &new_intr->def, channels, old_num_comps);
150    nir_def_replace(&intr->def, load);
151 }
152 
153 static bool
r600_vec_instr_stack_pop(nir_builder * b,struct util_dynarray * stack,nir_instr * instr,nir_variable * updated_vars[16][4])154 r600_vec_instr_stack_pop(nir_builder *b,
155                          struct util_dynarray *stack,
156                          nir_instr *instr,
157                          nir_variable *updated_vars[16][4])
158 {
159    nir_instr *last = util_dynarray_pop(stack, nir_instr *);
160 
161    assert(last == instr);
162    assert(last->type == nir_instr_type_intrinsic);
163 
164    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
165    nir_variable *var = nir_intrinsic_get_var(intr, 0);
166    unsigned loc = r600_correct_location(var);
167 
168    nir_variable *new_var;
169    new_var = updated_vars[loc][var->data.location_frac];
170 
171    unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type));
172 
173    unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type));
174 
175    /* Don't bother walking the stack if this component can't be vectorised. */
176    if (old_num_comps > 3) {
177       return false;
178    }
179 
180    if (new_var == var) {
181       return false;
182    }
183 
184    r600_create_new_load(
185       b, intr, new_var, var->data.location_frac, num_comps, old_num_comps);
186    return true;
187 }
188 
189 static bool
r600_cmp_func(const void * data1,const void * data2)190 r600_cmp_func(const void *data1, const void *data2)
191 {
192    const struct util_dynarray *arr1 = data1;
193    const struct util_dynarray *arr2 = data2;
194 
195    const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
196    const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
197 
198    return r600_io_access_same_var(instr1, instr2);
199 }
200 
201 #define HASH(hash, data) XXH32(&(data), sizeof(data), (hash))
202 
203 static uint32_t
r600_hash_instr(const nir_instr * instr)204 r600_hash_instr(const nir_instr *instr)
205 {
206    assert(instr->type == nir_instr_type_intrinsic);
207 
208    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
209    nir_variable *var = nir_intrinsic_get_var(intr, 0);
210 
211    uint32_t hash = 0;
212 
213    hash = HASH(hash, var->type);
214    return HASH(hash, var->data.location);
215 }
216 
217 static uint32_t
r600_hash_stack(const void * data)218 r600_hash_stack(const void *data)
219 {
220    const struct util_dynarray *stack = data;
221    const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
222    return r600_hash_instr(first);
223 }
224 
225 static struct set *
r600_vec_instr_set_create(void)226 r600_vec_instr_set_create(void)
227 {
228    return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
229 }
230 
231 static void
r600_vec_instr_set_destroy(struct set * instr_set)232 r600_vec_instr_set_destroy(struct set *instr_set)
233 {
234    _mesa_set_destroy(instr_set, NULL);
235 }
236 
237 static void
r600_vec_instr_set_add(struct set * instr_set,nir_instr * instr)238 r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
239 {
240    if (!r600_instr_can_rewrite(instr)) {
241       return;
242    }
243 
244    struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
245    r600_vec_instr_stack_push(new_stack, instr);
246 
247    struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
248 
249    if (entry) {
250       ralloc_free(new_stack);
251       struct util_dynarray *stack = (struct util_dynarray *)entry->key;
252       r600_vec_instr_stack_push(stack, instr);
253       return;
254    }
255 
256    _mesa_set_add(instr_set, new_stack);
257 
258    return;
259 }
260 
261 static bool
r600_vec_instr_set_remove(nir_builder * b,struct set * instr_set,nir_instr * instr,nir_variable * updated_vars[16][4])262 r600_vec_instr_set_remove(nir_builder *b,
263                           struct set *instr_set,
264                           nir_instr *instr,
265                           nir_variable *updated_vars[16][4])
266 {
267    if (!r600_instr_can_rewrite(instr)) {
268       return false;
269    }
270    /*
271     * It's pretty unfortunate that we have to do this, but it's a side effect
272     * of the hash set interfaces. The hash set assumes that we're only
273     * interested in storing one equivalent element at a time, and if we try to
274     * insert a duplicate element it will remove the original. We could hack up
275     * the comparison function to "know" which input is an instruction we
276     * passed in and which is an array that's part of the entry, but that
277     * wouldn't work because we need to pass an array to _mesa_set_add() in
278     * vec_instr_add() above, and _mesa_set_add() will call our comparison
279     * function as well.
280     */
281    struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
282    r600_vec_instr_stack_push(temp, instr);
283    struct set_entry *entry = _mesa_set_search(instr_set, temp);
284    ralloc_free(temp);
285 
286    if (entry) {
287       struct util_dynarray *stack = (struct util_dynarray *)entry->key;
288       bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
289 
290       if (!util_dynarray_num_elements(stack, nir_instr *))
291          _mesa_set_remove(instr_set, entry);
292 
293       return progress;
294    }
295 
296    return false;
297 }
298 
299 static bool
r600_vectorize_block(nir_builder * b,nir_block * block,struct set * instr_set,nir_variable * updated_vars[16][4])300 r600_vectorize_block(nir_builder *b,
301                      nir_block *block,
302                      struct set *instr_set,
303                      nir_variable *updated_vars[16][4])
304 {
305    bool progress = false;
306 
307    nir_foreach_instr_safe(instr, block) { r600_vec_instr_set_add(instr_set, instr); }
308 
309    for (unsigned i = 0; i < block->num_dom_children; i++) {
310       nir_block *child = block->dom_children[i];
311       progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
312    }
313 
314    nir_foreach_instr_reverse_safe(instr, block)
315    {
316       progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
317    }
318 
319    return progress;
320 }
321 
322 static void
r600_create_new_io_var(nir_shader * shader,nir_variable * vars[16][4],unsigned location,unsigned comps)323 r600_create_new_io_var(nir_shader *shader,
324                        nir_variable *vars[16][4],
325                        unsigned location,
326                        unsigned comps)
327 {
328    unsigned num_comps = util_bitcount(comps);
329    assert(num_comps > 1);
330 
331    /* Note: u_bit_scan() strips a component of the comps bitfield here */
332    unsigned first_comp = u_bit_scan(&comps);
333 
334    nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
335    var->data.location_frac = first_comp;
336    var->type = glsl_replace_vector_type(var->type, num_comps);
337 
338    nir_shader_add_variable(shader, var);
339 
340    vars[location][first_comp] = var;
341 
342    while (comps) {
343       const int comp = u_bit_scan(&comps);
344       if (vars[location][comp]) {
345          vars[location][comp] = var;
346       }
347    }
348 }
349 
350 static inline bool
r600_variables_can_merge(const nir_variable * lhs,const nir_variable * rhs)351 r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
352 {
353    return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
354 }
355 
356 static void
r600_create_new_io_vars(nir_shader * shader,nir_variable_mode mode,nir_variable * vars[16][4])357 r600_create_new_io_vars(nir_shader *shader,
358                         nir_variable_mode mode,
359                         nir_variable *vars[16][4])
360 {
361    bool can_rewrite_vars = false;
362    nir_foreach_variable_with_modes(var, shader, mode)
363    {
364       if (r600_variable_can_rewrite(var)) {
365          can_rewrite_vars = true;
366          unsigned loc = r600_correct_location(var);
367          vars[loc][var->data.location_frac] = var;
368       }
369    }
370 
371    if (!can_rewrite_vars)
372       return;
373 
374    /* We don't handle combining vars of different type e.g. different array
375     * lengths.
376     */
377    for (unsigned i = 0; i < 16; i++) {
378       unsigned comps = 0;
379 
380       for (unsigned j = 0; j < 3; j++) {
381 
382          if (!vars[i][j])
383             continue;
384 
385          for (unsigned k = j + 1; k < 4; k++) {
386             if (!vars[i][k])
387                continue;
388 
389             if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
390                continue;
391 
392             /* Set comps */
393             for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
394                comps |= 1 << (vars[i][j]->data.location_frac + n);
395 
396             for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
397                comps |= 1 << (vars[i][k]->data.location_frac + n);
398          }
399       }
400       if (comps)
401          r600_create_new_io_var(shader, vars, i, comps);
402    }
403 }
404 
405 static bool
r600_vectorize_io_impl(nir_function_impl * impl)406 r600_vectorize_io_impl(nir_function_impl *impl)
407 {
408    nir_builder b = nir_builder_create(impl);
409 
410    nir_metadata_require(impl, nir_metadata_dominance);
411 
412    nir_shader *shader = impl->function->shader;
413    nir_variable *updated_vars[16][4] = {0};
414 
415    r600_create_new_io_vars(shader, nir_var_shader_in, updated_vars);
416 
417    struct set *instr_set = r600_vec_instr_set_create();
418    bool progress =
419       r600_vectorize_block(&b, nir_start_block(impl), instr_set, updated_vars);
420 
421    if (progress) {
422       nir_metadata_preserve(impl, nir_metadata_control_flow);
423    } else {
424       nir_metadata_preserve(impl, nir_metadata_all);
425    }
426 
427    r600_vec_instr_set_destroy(instr_set);
428    return false;
429 }
430 
431 bool
r600_vectorize_vs_inputs(nir_shader * shader)432 r600_vectorize_vs_inputs(nir_shader *shader)
433 {
434    bool progress = false;
435 
436    if (shader->info.stage != MESA_SHADER_VERTEX)
437       return false;
438 
439    nir_foreach_function_impl(impl, shader)
440    {
441       progress |= r600_vectorize_io_impl(impl);
442    }
443 
444    return progress;
445 }
446