1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_builder.h"
25 #include "nir_search_helpers.h"
26
27 /**
28 * Attempt to reassociate and optimize some combinations of bfi instructions.
29 *
30 * Many shaders will use a sequence of bfi instructions to build complex
31 * bitfields. Sequences in real shaders look like:
32 *
33 * bfi(A, B, bfi(C, D, 0))
34 *
35 * Let A and C be constants,
36 *
37 * (A & (B << find_lsb(A)) | (~A & bfi(C, D, 0))
38 *
39 * If find_lsb(A) = 0 (i.e., A is odd), then
40 *
41 * (A & B) | (~A & bfi(C, D, 0))
42 * (A & B) | (~A & ((D << find_lsb(C)) & C) | (0 & ~C))
43 * (A & B) | (~A & ((D << find_lsb(C)) & C))
44 * (A & B) | ((D << find_lsb(C)) & (~A & C))
45 *
46 * If A and C are completely disjoint, that is (A & C) == 0, then (~A & C) == C
47 *
48 * (A & B) | ((D << find_lsb(C)) & C)
49 *
50 * If A and C are completely disjoint, then A & ~C == A
51 *
52 * (~C & (A & B)) | ((D << find_lsb(C)) & C)
53 * bfi(C, D, A & B)
54 *
55 * On some architectures, bfi instructions cannot use immediate values as
56 * sources, so the constants A, C, and 0 would have to be loaded into
57 * registers. After reassociation, only C must be loaded into a register.
58 * This saves instructions and register pressure.
59 *
60 * Ideally this would be implemented as an algebraic optimization, but there
61 * is no way to enforce the requirement that (A & C) == 0.
62 */
63
64 static bool
nir_opt_reassociate_bfi_instr(nir_builder * b,nir_alu_instr * bfiCD0,UNUSED void * cb_data)65 nir_opt_reassociate_bfi_instr(nir_builder *b,
66 nir_alu_instr *bfiCD0,
67 UNUSED void *cb_data)
68 {
69 if (bfiCD0->op != nir_op_bfi || bfiCD0->def.num_components != 1)
70 return false;
71
72 /* Enforce the bfi('#c', d, 0) part of the pattern. */
73 if (!nir_src_is_const(bfiCD0->src[0].src) ||
74 !nir_src_is_const(bfiCD0->src[2].src) ||
75 nir_src_comp_as_uint(bfiCD0->src[2].src,
76 bfiCD0->src[2].swizzle[0]) != 0) {
77 return false;
78 }
79
80 const uint64_t C = nir_src_comp_as_uint(bfiCD0->src[0].src,
81 bfiCD0->src[0].swizzle[0]);
82
83 if (!is_used_once(bfiCD0))
84 return false;
85
86 nir_src *use = list_first_entry(&bfiCD0->def.uses,
87 nir_src, use_link);
88
89 if (nir_src_parent_instr(use)->type != nir_instr_type_alu)
90 return false;
91
92 nir_alu_instr *bfiABx = nir_instr_as_alu(nir_src_parent_instr(use));
93 if (bfiABx->op != nir_op_bfi || bfiABx->def.num_components != 1)
94 return false;
95
96 /* Enforce the bfi('#a', b, ...) part of the pattern. */
97 if (!nir_src_is_const(bfiABx->src[0].src) ||
98 bfiABx->src[2].src.ssa != &bfiCD0->def) {
99 return false;
100 }
101
102 const uint64_t A = nir_src_comp_as_uint(bfiABx->src[0].src,
103 bfiABx->src[0].swizzle[0]);
104
105 /* Enforce the find_lsb(A) == 0 part of the pattern. */
106 if ((A & 1) == 0)
107 return false;
108
109 /* Enforce the "A and C are disjoint" part of the pattern. */
110 if ((A & C) != 0)
111 return false;
112
113 /* Now emit the new instructions. */
114 b->cursor = nir_before_instr(&bfiABx->instr);
115
116 /* The extra nir_mov_alu are to handle swizzles that might be on the
117 * original sources.
118 */
119 nir_def *new_bfi = nir_bfi(b,
120 nir_mov_alu(b, bfiCD0->src[0], 1),
121 nir_mov_alu(b, bfiCD0->src[1], 1),
122 nir_iand(b,
123 nir_mov_alu(b, bfiABx->src[0], 1),
124 nir_mov_alu(b, bfiABx->src[1], 1)));
125
126 nir_def_rewrite_uses(&bfiABx->def, new_bfi);
127 return true;
128 }
129
130 bool
nir_opt_reassociate_bfi(nir_shader * shader)131 nir_opt_reassociate_bfi(nir_shader *shader)
132 {
133 return nir_shader_alu_pass(shader, nir_opt_reassociate_bfi_instr,
134 nir_metadata_control_flow, NULL);
135 }
136