1 /*
2 * Copyright 2010 Marek Olšák <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include "radeon_remove_constants.h"
9 #include "radeon_dataflow.h"
10 #include "util/bitscan.h"
11
12 struct const_remap_state {
13 /* Used when emiting shaders constants. */
14 struct const_remap *remap_table;
15 /* Used when rewritign registers */
16 struct const_remap *inv_remap_table;
17 /* Old costant layout. */
18 struct rc_constant *constants;
19 /* New constant layout. */
20 struct rc_constant_list new_constants;
21 /* Marks immediates that are used as a vector. Those will be just copied. */
22 uint8_t *is_used_as_vector;
23 bool has_rel_addr;
24 bool are_externals_remapped;
25 bool is_identity;
26 };
27
remap_regs(struct rc_instruction * inst,struct const_remap * inv_remap_table)28 static void remap_regs(struct rc_instruction *inst,
29 struct const_remap *inv_remap_table)
30 {
31 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
32 for(unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
33 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT)
34 continue;
35 unsigned old_index = inst->U.I.SrcReg[src].Index;
36 for (unsigned chan = 0; chan < 4; chan++) {
37 unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
38 if (old_swz <= RC_SWIZZLE_W) {
39 inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz];
40 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
41 inv_remap_table[old_index].swizzle[old_swz]);
42 }
43 }
44 }
45 }
46
mark_used(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)47 static void mark_used(void * userdata, struct rc_instruction * inst,
48 struct rc_src_register * src)
49 {
50 struct const_remap_state* d = userdata;
51
52 if (src->File == RC_FILE_CONSTANT) {
53 uint8_t mask = 0;
54 if (src->RelAddr) {
55 d->has_rel_addr = true;
56 } else {
57 for (unsigned chan = 0; chan < 4; chan++) {
58 char swz = GET_SWZ(src->Swizzle, chan);
59 if (swz > RC_SWIZZLE_W)
60 continue;
61 mask |= 1 << swz;
62 }
63 }
64 d->constants[src->Index].UseMask |= mask;
65 if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE &&
66 util_bitcount(mask) > 1) {
67 d->is_used_as_vector[src->Index] |= mask;
68 }
69 }
70 }
71
place_constant_in_free_slot(struct const_remap_state * s,unsigned i)72 static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
73 {
74 unsigned count = s->new_constants.Count;
75 for (unsigned chan = 0; chan < 4; chan++) {
76 s->inv_remap_table[i].index[chan] = count;
77 s->inv_remap_table[i].swizzle[chan] = chan;
78 if (s->constants[i].UseMask & (1 << chan)) {
79 s->remap_table[count].index[chan] = i;
80 s->remap_table[count].swizzle[chan] = chan;
81 }
82 }
83 s->new_constants.Constants[count] = s->constants[i];
84
85 if (count != i) {
86 if (s->constants[i].Type == RC_CONSTANT_EXTERNAL)
87 s->are_externals_remapped = true;
88 s->is_identity = false;
89 }
90 s->new_constants.Count++;
91 }
92
place_immediate_in_free_slot(struct const_remap_state * s,unsigned i)93 static void place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
94 {
95 assert(util_bitcount(s->is_used_as_vector[i]) > 1);
96
97 unsigned count = s->new_constants.Count;
98
99 s->new_constants.Constants[count] = s->constants[i];
100 s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
101 for (unsigned chan = 0; chan < 4; chan++) {
102 if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
103 s->inv_remap_table[i].index[chan] = count;
104 s->inv_remap_table[i].swizzle[chan] = chan;
105 }
106 }
107 if (count != i) {
108 s->is_identity = false;
109 }
110 s->new_constants.Count++;
111 }
112
try_merge_constants_external(struct const_remap_state * s,unsigned i)113 static void try_merge_constants_external(struct const_remap_state *s, unsigned i)
114 {
115 assert(util_bitcount(s->constants[i].UseMask) == 1);
116 for (unsigned j = 0; j < s->new_constants.Count; j++) {
117 for (unsigned chan = 0; chan < 4; chan++) {
118 if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
119 /* Writemask to swizzle */
120 unsigned swizzle = 0;
121 for (; swizzle < 4; swizzle++)
122 if (s->constants[i].UseMask >> swizzle == 1)
123 break;
124 /* Update the remap tables. */
125 s->remap_table[j].index[chan] = i;
126 s->remap_table[j].swizzle[chan] = swizzle;
127 s->inv_remap_table[i].index[swizzle] = j;
128 s->inv_remap_table[i].swizzle[swizzle] = chan;
129 s->are_externals_remapped = true;
130 s->is_identity = false;
131 return;
132 }
133 }
134 }
135 place_constant_in_free_slot(s, i);
136 }
137
init_constant_remap_state(struct radeon_compiler * c,struct const_remap_state * s)138 static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
139 {
140 s->is_identity = true;
141 s->is_used_as_vector = malloc(c->Program.Constants.Count);
142 s->new_constants.Constants =
143 malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
144 s->new_constants._Reserved = c->Program.Constants.Count;
145 s->constants = c->Program.Constants.Constants;
146 memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
147
148 s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
149 s->inv_remap_table =
150 malloc(c->Program.Constants.Count * sizeof(struct const_remap));
151 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
152 /* Clear the UseMask, we will update it later. */
153 s->constants[i].UseMask = 0;
154 for (unsigned swz = 0; swz < 4; swz++) {
155 s->remap_table[i].index[swz] = -1;
156 s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED;
157 }
158 }
159 }
160
rc_remove_unused_constants(struct radeon_compiler * c,void * user)161 void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
162 {
163 struct const_remap **out_remap_table = (struct const_remap **)user;
164 struct rc_constant *constants = c->Program.Constants.Constants;
165 struct const_remap_state remap_state = {};
166 struct const_remap_state *s = &remap_state;
167
168 if (!c->Program.Constants.Count) {
169 *out_remap_table = NULL;
170 return;
171 }
172
173 init_constant_remap_state(c, s);
174
175 /* Pass 1: Mark used constants. */
176 for (struct rc_instruction *inst = c->Program.Instructions.Next;
177 inst != &c->Program.Instructions; inst = inst->Next) {
178 rc_for_all_reads_src(inst, mark_used, s);
179 }
180
181 /* Pass 2: If there is relative addressing or dead constant elimination
182 * is disabled, mark all externals as used. */
183 if (s->has_rel_addr || !c->remove_unused_constants) {
184 for (unsigned i = 0; i < c->Program.Constants.Count; i++)
185 if (constants[i].Type == RC_CONSTANT_EXTERNAL)
186 s->constants[i].UseMask = RC_MASK_XYZW;
187 }
188
189
190 /* Pass 3: Make the remapping table and remap constants.
191 * First iterate over used vec2, vec3 and vec4 externals and place them in a free
192 * slots. While we could in theory merge 2 vec2 together, its not worth it
193 * as we would have to a) check that the swizzle is valid, b) transforming
194 * xy to zw would mean we need rgb and alpha source slot, thus it would hurt
195 * us potentially during pair scheduling. */
196 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
197 if (constants[i].Type != RC_CONSTANT_EXTERNAL)
198 continue;
199 if (util_bitcount(s->constants[i].UseMask) > 1) {
200 place_constant_in_free_slot(s, i);
201 }
202 }
203
204 /* Now iterate over scalarar externals and put them into empty slots. */
205 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
206 if (constants[i].Type != RC_CONSTANT_EXTERNAL)
207 continue;
208 if (util_bitcount(s->constants[i].UseMask) == 1)
209 try_merge_constants_external(s, i);
210 }
211
212 /* Now put immediates which are used as vectors. */
213 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
214 if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
215 util_bitcount(s->constants[i].UseMask) > 0 &&
216 util_bitcount(s->is_used_as_vector[i]) > 0) {
217 place_immediate_in_free_slot(s, i);
218 }
219 }
220
221 /* Now walk over scalar immediates and try to:
222 * a) check for duplicates,
223 * b) find free slot.
224 * All of this is already done by rc_constants_add_immediate_scalar,
225 * so just use it.
226 */
227 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
228 if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
229 continue;
230 for (unsigned chan = 0; chan < 4; chan++) {
231 if ((s->constants[i].UseMask) & (1 << chan) &&
232 (~(s->is_used_as_vector[i]) & (1 << chan))) {
233 unsigned swz;
234 s->inv_remap_table[i].index[chan] =
235 rc_constants_add_immediate_scalar(&s->new_constants, constants[i].u.Immediate[chan], &swz);
236 s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
237 s->is_identity = false;
238 }
239 }
240 }
241
242 /* Finally place state constants. */
243 for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
244 if (constants[i].Type != RC_CONSTANT_STATE)
245 continue;
246 if (util_bitcount(s->constants[i].UseMask) > 0) {
247 place_constant_in_free_slot(s, i);
248 }
249 }
250
251 /* is_identity ==> new_count == old_count
252 * !is_identity ==> new_count < old_count */
253 assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
254
255 /* Pass 4: Redirect reads of all constants to their new locations. */
256 if (!s->is_identity) {
257 for (struct rc_instruction *inst = c->Program.Instructions.Next;
258 inst != &c->Program.Instructions; inst = inst->Next) {
259 remap_regs(inst, s->inv_remap_table);
260 }
261 }
262
263 /* Set the new constant count. Note that new_count may be less than
264 * Count even though the remapping function is identity. In that case,
265 * the constants have been removed at the end of the array. */
266 rc_constants_destroy(&c->Program.Constants);
267 c->Program.Constants = s->new_constants;
268
269 if (s->are_externals_remapped) {
270 *out_remap_table = s->remap_table;
271 } else {
272 *out_remap_table = NULL;
273 free(s->remap_table);
274 }
275
276 free(s->inv_remap_table);
277
278 if (c->Debug & RC_DBG_LOG)
279 rc_constants_print(&c->Program.Constants, s->remap_table);
280 }
281