xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_remove_constants.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2010 Marek Olšák <[email protected]>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <stdlib.h>
7 #include <stdbool.h>
8 #include "radeon_remove_constants.h"
9 #include "radeon_dataflow.h"
10 #include "util/bitscan.h"
11 
12 struct const_remap_state {
13 	/* Used when emiting shaders constants. */
14 	struct const_remap *remap_table;
15 	/* Used when rewritign registers */
16 	struct const_remap *inv_remap_table;
17 	/* Old costant layout. */
18 	struct rc_constant *constants;
19 	/* New constant layout. */
20 	struct rc_constant_list new_constants;
21 	/* Marks immediates that are used as a vector. Those will be just copied. */
22 	uint8_t *is_used_as_vector;
23 	bool has_rel_addr;
24 	bool are_externals_remapped;
25 	bool is_identity;
26 };
27 
remap_regs(struct rc_instruction * inst,struct const_remap * inv_remap_table)28 static void remap_regs(struct rc_instruction *inst,
29 			struct const_remap *inv_remap_table)
30 {
31 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
32 	for(unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
33 		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT)
34 			continue;
35 		unsigned old_index = inst->U.I.SrcReg[src].Index;
36 		for (unsigned chan = 0; chan < 4; chan++) {
37 			unsigned old_swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
38 			if (old_swz <= RC_SWIZZLE_W) {
39 				inst->U.I.SrcReg[src].Index = inv_remap_table[old_index].index[old_swz];
40 				SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
41 					inv_remap_table[old_index].swizzle[old_swz]);
42 			}
43 		}
44 	}
45 }
46 
mark_used(void * userdata,struct rc_instruction * inst,struct rc_src_register * src)47 static void mark_used(void * userdata, struct rc_instruction * inst,
48 						struct rc_src_register * src)
49 {
50 	struct const_remap_state* d = userdata;
51 
52 	if (src->File == RC_FILE_CONSTANT) {
53 		uint8_t mask = 0;
54 		if (src->RelAddr) {
55 			d->has_rel_addr = true;
56 		} else {
57 			for (unsigned chan = 0; chan < 4; chan++) {
58 				char swz = GET_SWZ(src->Swizzle, chan);
59 				if (swz > RC_SWIZZLE_W)
60 					continue;
61 				mask |= 1 << swz;
62 			}
63 		}
64 		d->constants[src->Index].UseMask |= mask;
65 		if (d->constants[src->Index].Type == RC_CONSTANT_IMMEDIATE &&
66 			util_bitcount(mask) > 1) {
67 			d->is_used_as_vector[src->Index] |= mask;
68 		}
69 	}
70 }
71 
place_constant_in_free_slot(struct const_remap_state * s,unsigned i)72 static void place_constant_in_free_slot(struct const_remap_state *s, unsigned i)
73 {
74 	unsigned count = s->new_constants.Count;
75 	for (unsigned chan = 0; chan < 4; chan++) {
76 		s->inv_remap_table[i].index[chan] = count;
77 		s->inv_remap_table[i].swizzle[chan] = chan;
78 		if (s->constants[i].UseMask & (1 << chan)) {
79 			s->remap_table[count].index[chan] = i;
80 			s->remap_table[count].swizzle[chan] = chan;
81 		}
82 	}
83 	s->new_constants.Constants[count] = s->constants[i];
84 
85 	if (count != i) {
86 		if (s->constants[i].Type == RC_CONSTANT_EXTERNAL)
87 			s->are_externals_remapped = true;
88 		s->is_identity = false;
89 	}
90 	s->new_constants.Count++;
91 }
92 
place_immediate_in_free_slot(struct const_remap_state * s,unsigned i)93 static void place_immediate_in_free_slot(struct const_remap_state *s, unsigned i)
94 {
95 	assert(util_bitcount(s->is_used_as_vector[i]) > 1);
96 
97 	unsigned count = s->new_constants.Count;
98 
99 	s->new_constants.Constants[count] = s->constants[i];
100 	s->new_constants.Constants[count].UseMask = s->is_used_as_vector[i];
101 	for (unsigned chan = 0; chan < 4; chan++) {
102 		if (s->constants[i].UseMask & 1 << chan & s->is_used_as_vector[i]) {
103 			s->inv_remap_table[i].index[chan] = count;
104 			s->inv_remap_table[i].swizzle[chan] = chan;
105 		}
106 	}
107 	if (count != i) {
108 		s->is_identity = false;
109 	}
110 	s->new_constants.Count++;
111 }
112 
try_merge_constants_external(struct const_remap_state * s,unsigned i)113 static void try_merge_constants_external(struct const_remap_state *s, unsigned i)
114 {
115 	assert(util_bitcount(s->constants[i].UseMask) == 1);
116 	for (unsigned j = 0; j < s->new_constants.Count; j++) {
117 		for (unsigned chan = 0; chan < 4; chan++) {
118 			if (s->remap_table[j].swizzle[chan] == RC_SWIZZLE_UNUSED) {
119 				/* Writemask to swizzle */
120 				unsigned swizzle = 0;
121 				for (; swizzle < 4; swizzle++)
122 					if (s->constants[i].UseMask >> swizzle == 1)
123 						break;
124 				/* Update the remap tables. */
125 				s->remap_table[j].index[chan] = i;
126 				s->remap_table[j].swizzle[chan] = swizzle;
127 				s->inv_remap_table[i].index[swizzle] = j;
128 				s->inv_remap_table[i].swizzle[swizzle] = chan;
129 				s->are_externals_remapped = true;
130 				s->is_identity = false;
131 				return;
132 			}
133 		}
134 	}
135 	place_constant_in_free_slot(s, i);
136 }
137 
init_constant_remap_state(struct radeon_compiler * c,struct const_remap_state * s)138 static void init_constant_remap_state(struct radeon_compiler *c, struct const_remap_state *s)
139 {
140 	s->is_identity = true;
141 	s->is_used_as_vector = malloc(c->Program.Constants.Count);
142 	s->new_constants.Constants =
143 		malloc(sizeof(struct rc_constant) * c->Program.Constants.Count);
144 	s->new_constants._Reserved = c->Program.Constants.Count;
145 	s->constants = c->Program.Constants.Constants;
146 	memset(s->is_used_as_vector, 0, c->Program.Constants.Count);
147 
148 	s->remap_table = malloc(c->Program.Constants.Count * sizeof(struct const_remap));
149 	s->inv_remap_table =
150 	malloc(c->Program.Constants.Count * sizeof(struct const_remap));
151 	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
152 		/* Clear the UseMask, we will update it later. */
153 		s->constants[i].UseMask = 0;
154 		for (unsigned swz = 0; swz < 4; swz++) {
155 			s->remap_table[i].index[swz] = -1;
156 			s->remap_table[i].swizzle[swz] = RC_SWIZZLE_UNUSED;
157 		}
158 	}
159 }
160 
rc_remove_unused_constants(struct radeon_compiler * c,void * user)161 void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
162 {
163 	struct const_remap **out_remap_table = (struct const_remap **)user;
164 	struct rc_constant *constants = c->Program.Constants.Constants;
165 	struct const_remap_state remap_state = {};
166 	struct const_remap_state *s = &remap_state;
167 
168 	if (!c->Program.Constants.Count) {
169 		*out_remap_table = NULL;
170 		return;
171 	}
172 
173 	init_constant_remap_state(c, s);
174 
175 	/* Pass 1: Mark used constants. */
176 	for (struct rc_instruction *inst = c->Program.Instructions.Next;
177 	     inst != &c->Program.Instructions; inst = inst->Next) {
178 		rc_for_all_reads_src(inst, mark_used, s);
179 	}
180 
181 	/* Pass 2: If there is relative addressing or dead constant elimination
182 	 * is disabled, mark all externals as used. */
183 	if (s->has_rel_addr || !c->remove_unused_constants) {
184 		for (unsigned i = 0; i < c->Program.Constants.Count; i++)
185 			if (constants[i].Type == RC_CONSTANT_EXTERNAL)
186 				s->constants[i].UseMask = RC_MASK_XYZW;
187 	}
188 
189 
190 	/* Pass 3: Make the remapping table and remap constants.
191 	 * First iterate over used vec2, vec3 and vec4 externals and place them in a free
192 	 * slots. While we could in theory merge 2 vec2 together, its not worth it
193 	 * as we would have to a) check that the swizzle is valid, b) transforming
194 	 * xy to zw would mean we need rgb and alpha source slot, thus it would hurt
195 	 * us potentially during pair scheduling. */
196 	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
197 		if (constants[i].Type != RC_CONSTANT_EXTERNAL)
198 			continue;
199 		if (util_bitcount(s->constants[i].UseMask) > 1) {
200 			place_constant_in_free_slot(s, i);
201 		}
202 	}
203 
204 	/* Now iterate over scalarar externals and put them into empty slots. */
205 	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
206 		if (constants[i].Type != RC_CONSTANT_EXTERNAL)
207 			continue;
208 		if (util_bitcount(s->constants[i].UseMask) == 1)
209 			try_merge_constants_external(s, i);
210 	}
211 
212 	/* Now put immediates which are used as vectors. */
213 	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
214 		if (constants[i].Type == RC_CONSTANT_IMMEDIATE &&
215 			util_bitcount(s->constants[i].UseMask) > 0 &&
216 			util_bitcount(s->is_used_as_vector[i]) > 0) {
217 			place_immediate_in_free_slot(s, i);
218 		}
219 	}
220 
221 	/* Now walk over scalar immediates and try to:
222 	 *  a) check for duplicates,
223 	 *  b) find free slot.
224 	 *  All of this is already done by rc_constants_add_immediate_scalar,
225 	 *  so just use it.
226 	 */
227 	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
228 		if (constants[i].Type != RC_CONSTANT_IMMEDIATE)
229 			continue;
230 		for (unsigned chan = 0; chan < 4; chan++) {
231 			if ((s->constants[i].UseMask) & (1 << chan) &&
232 				(~(s->is_used_as_vector[i]) & (1 << chan))) {
233 				unsigned swz;
234 				s->inv_remap_table[i].index[chan] =
235 					rc_constants_add_immediate_scalar(&s->new_constants, constants[i].u.Immediate[chan], &swz);
236 				s->inv_remap_table[i].swizzle[chan] = GET_SWZ(swz, 0);
237 				s->is_identity = false;
238 			}
239 		}
240 	}
241 
242 	/* Finally place state constants. */
243 	for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
244 		if (constants[i].Type != RC_CONSTANT_STATE)
245 			continue;
246 		if (util_bitcount(s->constants[i].UseMask) > 0) {
247 			place_constant_in_free_slot(s,  i);
248 		}
249 	}
250 
251 	/*  is_identity ==> new_count == old_count
252 	 * !is_identity ==> new_count <  old_count */
253 	assert(!((s->has_rel_addr || !c->remove_unused_constants) && s->are_externals_remapped));
254 
255 	/* Pass 4: Redirect reads of all constants to their new locations. */
256 	if (!s->is_identity) {
257 		for (struct rc_instruction *inst = c->Program.Instructions.Next;
258 		     inst != &c->Program.Instructions; inst = inst->Next) {
259 			remap_regs(inst, s->inv_remap_table);
260 		}
261 	}
262 
263 	/* Set the new constant count. Note that new_count may be less than
264 	 * Count even though the remapping function is identity. In that case,
265 	 * the constants have been removed at the end of the array. */
266 	rc_constants_destroy(&c->Program.Constants);
267 	c->Program.Constants = s->new_constants;
268 
269 	if (s->are_externals_remapped) {
270 		*out_remap_table = s->remap_table;
271 	} else {
272 		*out_remap_table = NULL;
273 		free(s->remap_table);
274 	}
275 
276 	free(s->inv_remap_table);
277 
278 	if (c->Debug & RC_DBG_LOG)
279 		rc_constants_print(&c->Program.Constants, s->remap_table);
280 }
281