xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_reassociate_bfi.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir_builder.h"
25 #include "nir_search_helpers.h"
26 
27 /**
28  * Attempt to reassociate and optimize some combinations of bfi instructions.
29  *
30  * Many shaders will use a sequence of bfi instructions to build complex
31  * bitfields.  Sequences in real shaders look like:
32  *
33  *    bfi(A, B, bfi(C, D, 0))
34  *
35  * Let A and C be constants,
36  *
37  *    (A & (B << find_lsb(A)) | (~A & bfi(C, D, 0))
38  *
39  * If find_lsb(A) = 0 (i.e., A is odd), then
40  *
41  *    (A & B) | (~A & bfi(C, D, 0))
42  *    (A & B) | (~A & ((D << find_lsb(C)) & C) | (0 & ~C))
43  *    (A & B) | (~A & ((D << find_lsb(C)) & C))
44  *    (A & B) | ((D << find_lsb(C)) & (~A & C))
45  *
46  * If A and C are completely disjoint, that is (A & C) == 0, then (~A & C) == C
47  *
48  *    (A & B) | ((D << find_lsb(C)) & C)
49  *
50  * If A and C are completely disjoint, then A & ~C == A
51  *
52  *    (~C & (A & B)) | ((D << find_lsb(C)) & C)
53  *    bfi(C, D, A & B)
54  *
55  * On some architectures, bfi instructions cannot use immediate values as
56  * sources, so the constants A, C, and 0 would have to be loaded into
57  * registers.  After reassociation, only C must be loaded into a register.
58  * This saves instructions and register pressure.
59  *
60  * Ideally this would be implemented as an algebraic optimization, but there
61  * is no way to enforce the requirement that (A & C) == 0.
62  */
63 
64 static bool
nir_opt_reassociate_bfi_instr(nir_builder * b,nir_alu_instr * bfiCD0,UNUSED void * cb_data)65 nir_opt_reassociate_bfi_instr(nir_builder *b,
66                               nir_alu_instr *bfiCD0,
67                               UNUSED void *cb_data)
68 {
69    if (bfiCD0->op != nir_op_bfi || bfiCD0->def.num_components != 1)
70       return false;
71 
72    /* Enforce the bfi('#c', d, 0) part of the pattern. */
73    if (!nir_src_is_const(bfiCD0->src[0].src) ||
74        !nir_src_is_const(bfiCD0->src[2].src) ||
75        nir_src_comp_as_uint(bfiCD0->src[2].src,
76                             bfiCD0->src[2].swizzle[0]) != 0) {
77       return false;
78    }
79 
80    const uint64_t C = nir_src_comp_as_uint(bfiCD0->src[0].src,
81                                            bfiCD0->src[0].swizzle[0]);
82 
83    if (!is_used_once(bfiCD0))
84       return false;
85 
86    nir_src *use = list_first_entry(&bfiCD0->def.uses,
87                                    nir_src, use_link);
88 
89    if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
90       return false;
91 
92    nir_alu_instr *bfiABx = nir_instr_as_alu(nir_src_parent_instr(use));
93    if (bfiABx->op != nir_op_bfi || bfiABx->def.num_components != 1)
94       return false;
95 
96    /* Enforce the bfi('#a', b, ...) part of the pattern. */
97    if (!nir_src_is_const(bfiABx->src[0].src) ||
98        bfiABx->src[2].src.ssa != &bfiCD0->def) {
99       return false;
100    }
101 
102    const uint64_t A = nir_src_comp_as_uint(bfiABx->src[0].src,
103                                            bfiABx->src[0].swizzle[0]);
104 
105    /* Enforce the find_lsb(A) == 0 part of the pattern. */
106    if ((A & 1) == 0)
107       return false;
108 
109    /* Enforce the "A and C are disjoint" part of the pattern. */
110    if ((A & C) != 0)
111       return false;
112 
113    /* Now emit the new instructions. */
114    b->cursor = nir_before_instr(&bfiABx->instr);
115 
116    /* The extra nir_mov_alu are to handle swizzles that might be on the
117     * original sources.
118     */
119    nir_def *new_bfi = nir_bfi(b,
120                               nir_mov_alu(b, bfiCD0->src[0], 1),
121                               nir_mov_alu(b, bfiCD0->src[1], 1),
122                               nir_iand(b,
123                                        nir_mov_alu(b, bfiABx->src[0], 1),
124                                        nir_mov_alu(b, bfiABx->src[1], 1)));
125 
126    nir_def_rewrite_uses(&bfiABx->def, new_bfi);
127    return true;
128 }
129 
130 bool
nir_opt_reassociate_bfi(nir_shader * shader)131 nir_opt_reassociate_bfi(nir_shader *shader)
132 {
133    return nir_shader_alu_pass(shader, nir_opt_reassociate_bfi_instr,
134                               nir_metadata_control_flow, NULL);
135 }
136