xref: /aosp_15_r20/external/mesa3d/src/panfrost/util/pan_lower_store_component.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2020-2022 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <[email protected]>
25  */
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "pan_ir.h"
29 
30 /*
31  * If the shader packs multiple varyings into the same location with different
32  * location_frac, we'll need to lower to a single varying store that collects
33  * all of the channels together. This is because the varying instruction on
34  * Midgard and Bifrost is slot-based, writing out an entire vec4 slot at a time.
35  */
36 static bool
lower_store_component(nir_builder * b,nir_intrinsic_instr * intr,void * data)37 lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data)
38 {
39    if (intr->intrinsic != nir_intrinsic_store_output)
40       return false;
41 
42    struct hash_table_u64 *slots = data;
43    unsigned component = nir_intrinsic_component(intr);
44    nir_src *slot_src = nir_get_io_offset_src(intr);
45    uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
46 
47    nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
48    unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
49 
50    nir_def *value = intr->src[0].ssa;
51    b->cursor = nir_before_instr(&intr->instr);
52 
53    nir_def *undef = nir_undef(b, 1, value->bit_size);
54    nir_def *channels[4] = {undef, undef, undef, undef};
55 
56    /* Copy old */
57    u_foreach_bit(i, mask) {
58       assert(prev != NULL);
59       nir_def *prev_ssa = prev->src[0].ssa;
60       channels[i] = nir_channel(b, prev_ssa, i);
61    }
62 
63    /* Copy new */
64    unsigned new_mask = nir_intrinsic_write_mask(intr);
65    mask |= (new_mask << component);
66 
67    u_foreach_bit(i, new_mask) {
68       assert(component + i < 4);
69       channels[component + i] = nir_channel(b, value, i);
70    }
71 
72    intr->num_components = util_last_bit(mask);
73    nir_src_rewrite(&intr->src[0], nir_vec(b, channels, intr->num_components));
74 
75    nir_intrinsic_set_component(intr, 0);
76    nir_intrinsic_set_write_mask(intr, mask);
77 
78    if (prev) {
79       _mesa_hash_table_u64_remove(slots, slot);
80       nir_instr_remove(&prev->instr);
81    }
82 
83    _mesa_hash_table_u64_insert(slots, slot, intr);
84    return false;
85 }
86 
87 bool
pan_nir_lower_store_component(nir_shader * s)88 pan_nir_lower_store_component(nir_shader *s)
89 {
90    assert(s->info.stage == MESA_SHADER_VERTEX);
91 
92    struct hash_table_u64 *stores = _mesa_hash_table_u64_create(NULL);
93    bool progress = nir_shader_intrinsics_pass(
94       s, lower_store_component,
95       nir_metadata_control_flow, stores);
96    _mesa_hash_table_u64_destroy(stores);
97    return progress;
98 }
99