xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_regalloc.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_regalloc.h"
8 #include "radeon_list.h"
9 
10 #define VERBOSE 0
11 
12 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
13 
14 const struct rc_class rc_class_list_vp [] = {
15 	{RC_REG_CLASS_VP_SINGLE, 4,
16 		{RC_MASK_X,
17 		 RC_MASK_Y,
18 		 RC_MASK_Z,
19 		 RC_MASK_W,
20 		 RC_MASK_NONE,
21 		 RC_MASK_NONE}},
22 	{RC_REG_CLASS_VP_DOUBLE, 6,
23 		{RC_MASK_X | RC_MASK_Y,
24 		 RC_MASK_X | RC_MASK_Z,
25 		 RC_MASK_X | RC_MASK_W,
26 		 RC_MASK_Y | RC_MASK_Z,
27 		 RC_MASK_Y | RC_MASK_W,
28 		 RC_MASK_Z | RC_MASK_W}},
29 	{RC_REG_CLASS_VP_TRIPLE, 4,
30 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
31 		 RC_MASK_X | RC_MASK_Y | RC_MASK_W,
32 		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
33 		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
34 		 RC_MASK_NONE,
35 		 RC_MASK_NONE}},
36 	{RC_REG_CLASS_VP_QUADRUPLE, 1,
37 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
38 		 RC_MASK_NONE,
39 		 RC_MASK_NONE,
40 		 RC_MASK_NONE,
41 		 RC_MASK_NONE,
42 		 RC_MASK_NONE}}
43 };
44 
45 const struct rc_class rc_class_list_fp [] = {
46 	{RC_REG_CLASS_FP_SINGLE, 3,
47 		{RC_MASK_X,
48 		 RC_MASK_Y,
49 		 RC_MASK_Z,
50 		 RC_MASK_NONE,
51 		 RC_MASK_NONE,
52 		 RC_MASK_NONE}},
53 	{RC_REG_CLASS_FP_DOUBLE, 3,
54 		{RC_MASK_X | RC_MASK_Y,
55 		 RC_MASK_X | RC_MASK_Z,
56 		 RC_MASK_Y | RC_MASK_Z,
57 		 RC_MASK_NONE,
58 		 RC_MASK_NONE,
59 		 RC_MASK_NONE}},
60 	{RC_REG_CLASS_FP_TRIPLE, 1,
61 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
62 		 RC_MASK_NONE,
63 		 RC_MASK_NONE,
64 		 RC_MASK_NONE,
65 		 RC_MASK_NONE,
66 		 RC_MASK_NONE}},
67 	{RC_REG_CLASS_FP_ALPHA, 1,
68 		{RC_MASK_W,
69 		 RC_MASK_NONE,
70 		 RC_MASK_NONE,
71 		 RC_MASK_NONE,
72 		 RC_MASK_NONE,
73 		 RC_MASK_NONE}},
74 	{RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, 3,
75 		{RC_MASK_X | RC_MASK_W,
76 		 RC_MASK_Y | RC_MASK_W,
77 		 RC_MASK_Z | RC_MASK_W,
78 		 RC_MASK_NONE,
79 		 RC_MASK_NONE,
80 		 RC_MASK_NONE}},
81 	{RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, 3,
82 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
83 		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
84 		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
85 		 RC_MASK_NONE,
86 		 RC_MASK_NONE,
87 		 RC_MASK_NONE}},
88 	{RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, 1,
89 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
90 		 RC_MASK_NONE,
91 		 RC_MASK_NONE,
92 		 RC_MASK_NONE,
93 		 RC_MASK_NONE,
94 		 RC_MASK_NONE}},
95 	{RC_REG_CLASS_FP_X, 1,
96 		{RC_MASK_X,
97 		 RC_MASK_NONE,
98 		 RC_MASK_NONE,
99 		 RC_MASK_NONE,
100 		 RC_MASK_NONE,
101 		 RC_MASK_NONE}},
102 	{RC_REG_CLASS_FP_Y, 1,
103 		{RC_MASK_Y,
104 		 RC_MASK_NONE,
105 		 RC_MASK_NONE,
106 		 RC_MASK_NONE,
107 		 RC_MASK_NONE,
108 		 RC_MASK_NONE}},
109 	{RC_REG_CLASS_FP_Z, 1,
110 		{RC_MASK_Z,
111 		 RC_MASK_NONE,
112 		 RC_MASK_NONE,
113 		 RC_MASK_NONE,
114 		 RC_MASK_NONE,
115 		 RC_MASK_NONE}},
116 	{RC_REG_CLASS_FP_XY, 1,
117 		{RC_MASK_X | RC_MASK_Y,
118 		 RC_MASK_NONE,
119 		 RC_MASK_NONE,
120 		 RC_MASK_NONE,
121 		 RC_MASK_NONE,
122 		 RC_MASK_NONE}},
123 	{RC_REG_CLASS_FP_YZ, 1,
124 		{RC_MASK_Y | RC_MASK_Z,
125 		 RC_MASK_NONE,
126 		 RC_MASK_NONE,
127 		 RC_MASK_NONE,
128 		 RC_MASK_NONE,
129 		 RC_MASK_NONE}},
130 	{RC_REG_CLASS_FP_XZ, 1,
131 		{RC_MASK_X | RC_MASK_Z,
132 		 RC_MASK_NONE,
133 		 RC_MASK_NONE,
134 		 RC_MASK_NONE,
135 		 RC_MASK_NONE,
136 		 RC_MASK_NONE}},
137 	{RC_REG_CLASS_FP_XW, 1,
138 		{RC_MASK_X | RC_MASK_W,
139 		 RC_MASK_NONE,
140 		 RC_MASK_NONE,
141 		 RC_MASK_NONE,
142 		 RC_MASK_NONE,
143 		 RC_MASK_NONE}},
144 	{RC_REG_CLASS_FP_YW, 1,
145 		{RC_MASK_Y | RC_MASK_W,
146 		 RC_MASK_NONE,
147 		 RC_MASK_NONE,
148 		 RC_MASK_NONE,
149 		 RC_MASK_NONE,
150 		 RC_MASK_NONE}},
151 	{RC_REG_CLASS_FP_ZW, 1,
152 		{RC_MASK_Z | RC_MASK_W,
153 		 RC_MASK_NONE,
154 		 RC_MASK_NONE,
155 		 RC_MASK_NONE,
156 		 RC_MASK_NONE,
157 		 RC_MASK_NONE}},
158 	{RC_REG_CLASS_FP_XYW, 1,
159 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
160 		 RC_MASK_NONE,
161 		 RC_MASK_NONE,
162 		 RC_MASK_NONE,
163 		 RC_MASK_NONE,
164 		 RC_MASK_NONE}},
165 	{RC_REG_CLASS_FP_YZW, 1,
166 		{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
167 		 RC_MASK_NONE,
168 		 RC_MASK_NONE,
169 		 RC_MASK_NONE,
170 		 RC_MASK_NONE,
171 		 RC_MASK_NONE}},
172 	{RC_REG_CLASS_FP_XZW, 1,
173 		{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
174 		 RC_MASK_NONE,
175 		 RC_MASK_NONE,
176 		 RC_MASK_NONE,
177 		 RC_MASK_NONE,
178 		 RC_MASK_NONE}}
179 };
180 
print_live_intervals(struct live_intervals * src)181 static void print_live_intervals(struct live_intervals * src)
182 {
183 	if (!src || !src->Used) {
184 		DBG("(null)");
185 		return;
186 	}
187 
188 	DBG("(%i,%i)", src->Start, src->End);
189 }
190 
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)191 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
192 {
193 	if (VERBOSE) {
194 		DBG("overlap_live_intervals: ");
195 		print_live_intervals(a);
196 		DBG(" to ");
197 		print_live_intervals(b);
198 		DBG("\n");
199 	}
200 
201 	if (!a->Used || !b->Used) {
202 		DBG("    unused interval\n");
203 		return 0;
204 	}
205 
206 	if (a->Start > b->Start) {
207 		if (a->Start < b->End) {
208 			DBG("    overlap\n");
209 			return 1;
210 		}
211 	} else if (b->Start > a->Start) {
212 		if (b->Start < a->End) {
213 			DBG("    overlap\n");
214 			return 1;
215 		}
216 	} else { /* a->Start == b->Start */
217 		if (a->Start != a->End && b->Start != b->End) {
218 			DBG("    overlap\n");
219 			return 1;
220 		}
221 	}
222 
223 	DBG("    no overlap\n");
224 
225 	return 0;
226 }
227 
rc_find_class(const struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)228 int rc_find_class(
229 	const struct rc_class * classes,
230 	unsigned int writemask,
231 	unsigned int max_writemask_count)
232 {
233 	unsigned int i;
234 	for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
235 		unsigned int j;
236 		if (classes[i].WritemaskCount > max_writemask_count) {
237 			continue;
238 		}
239 		for (j = 0; j < classes[i].WritemaskCount; j++) {
240 			if (classes[i].Writemasks[j] == writemask) {
241 				return i;
242 			}
243 		}
244 	}
245 	return -1;
246 }
247 
rc_overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)248 unsigned int rc_overlap_live_intervals_array(
249 	struct live_intervals * a,
250 	struct live_intervals * b)
251 {
252 	unsigned int a_chan, b_chan;
253 	for (a_chan = 0; a_chan < 4; a_chan++) {
254 		for (b_chan = 0; b_chan < 4; b_chan++) {
255 			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
256 					return 1;
257 			}
258 		}
259 	}
260 	return 0;
261 }
262 
263 #if VERBOSE
print_reg(int reg)264 static void print_reg(int reg)
265 {
266 	unsigned int index = reg_get_index(reg);
267 	unsigned int mask = reg_get_writemask(reg);
268 	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
269 		mask & RC_MASK_X ? 'x' : '_',
270 		mask & RC_MASK_Y ? 'y' : '_',
271 		mask & RC_MASK_Z ? 'z' : '_',
272 		mask & RC_MASK_W ? 'w' : '_');
273 }
274 #endif
275 
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)276 static void add_register_conflicts(
277 	struct ra_regs * regs,
278 	unsigned int max_temp_regs)
279 {
280 	unsigned int index, a_mask, b_mask;
281 	for (index = 0; index < max_temp_regs; index++) {
282 		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
283 			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
284 								b_mask++) {
285 				if (a_mask & b_mask) {
286 					ra_add_reg_conflict(regs,
287 						get_reg_id(index, a_mask),
288 						get_reg_id(index, b_mask));
289 				}
290 			}
291 		}
292 	}
293 }
294 
rc_build_interference_graph(struct ra_graph * graph,struct rc_list * variables)295 void rc_build_interference_graph(
296 	struct ra_graph * graph,
297 	struct rc_list * variables)
298 {
299 	unsigned node_index;
300 	struct rc_list * var_ptr;
301 
302 	/* Build the interference graph */
303 	for (var_ptr = variables, node_index = 0; var_ptr;
304 					var_ptr = var_ptr->Next, node_index++) {
305 		struct rc_list * a, * b;
306 		unsigned int b_index;
307 
308 		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
309 						b; b = b->Next, b_index++) {
310 			struct rc_variable * var_a = a->Item;
311 			while (var_a) {
312 				struct rc_variable * var_b = b->Item;
313 				while (var_b) {
314 					if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
315 						ra_add_node_interference(graph,
316 							node_index, b_index);
317 					}
318 					var_b = var_b->Friend;
319 				}
320 				var_a = var_a->Friend;
321 			}
322 		}
323 	}
324 }
325 
rc_init_regalloc_state(struct rc_regalloc_state * s,enum rc_program_type prog)326 void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
327 {
328 	unsigned i, j, index, class_count, max_temps;
329 	unsigned **ra_q_values;
330 
331 	/* Pre-computed q values.  This array describes the maximum number of
332 	 * a class's [row] registers that are in conflict with a single
333 	 * register from another class [column].
334 	 *
335 	 * For example:
336 	 * q_values[0][2] is 3, because a register from class 2
337 	 * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
338 	 * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
339 	 * and T0.z.
340 	 *
341 	 * q_values[2][0] is 1, because a register from class 0
342 	 * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
343 	 * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
344 	 *
345 	 * The q values for each register class [row] will never be greater
346 	 * than the maximum number of writemask combinations for that class.
347 	 *
348 	 * For example:
349 	 *
350 	 * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
351 	 * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
352 	 * than 1.
353 	 */
354 	const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
355 	{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
356 	{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
357 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
358 	{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
359 	{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
360 	{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
361 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
362 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
363 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
364 	{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
365 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
366 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
367 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
368 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
369 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
370 	{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
371 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
372 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
373 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
374 	};
375 
376 	const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {
377 	{1, 2, 3, 4},
378 	{3, 5, 6, 6},
379 	{3, 4, 4, 4},
380 	{1, 1, 1, 1}
381 	};
382 
383 	if (prog == RC_FRAGMENT_PROGRAM) {
384 		s->class_list = rc_class_list_fp;
385 		class_count = RC_REG_CLASS_FP_COUNT;
386 		max_temps = R500_PFS_NUM_TEMP_REGS;
387 	} else {
388 		s->class_list = rc_class_list_vp;
389 		class_count = RC_REG_CLASS_VP_COUNT;
390 		max_temps = R300_VS_MAX_TEMPS;
391 	}
392 
393 	/* Allocate the main ra data structure */
394 	s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW,
395                                    true);
396 
397 	/* Create the register classes */
398 	for (i = 0; i < class_count; i++) {
399 		const struct rc_class *class = &s->class_list[i];
400 		s->classes[class->ID] = ra_alloc_reg_class(s->regs);
401 
402 		/* Assign registers to the classes */
403 		for (index = 0; index < max_temps; index++) {
404 			for (j = 0; j < class->WritemaskCount; j++) {
405 				int reg_id = get_reg_id(index,
406 						class->Writemasks[j]);
407 				ra_class_add_reg(s->classes[class->ID], reg_id);
408 			}
409 		}
410 	}
411 
412 	/* Set the q values.  The q_values array is indexed based on
413 	 * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
414 	 * different than the ID assigned to that class by ra.
415 	 * This why we need to manually construct this list.
416 	 */
417 	ra_q_values = MALLOC(class_count * sizeof(unsigned *));
418 
419 	for (i = 0; i < class_count; i++) {
420 		ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
421 		for (j = 0; j < class_count; j++) {
422 			if (prog == RC_FRAGMENT_PROGRAM)
423 				ra_q_values[i][j] = q_values_fp[i][j];
424 			else
425 				ra_q_values[i][j] = q_values_vp[i][j];
426 		}
427 	}
428 
429 	/* Add register conflicts */
430 	add_register_conflicts(s->regs, max_temps);
431 
432 	ra_set_finalize(s->regs, ra_q_values);
433 
434 	for (i = 0; i < class_count; i++) {
435 		FREE(ra_q_values[i]);
436 	}
437 	FREE(ra_q_values);
438 }
439 
rc_destroy_regalloc_state(struct rc_regalloc_state * s)440 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
441 {
442 	ralloc_free(s->regs);
443 }
444