1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radeon_regalloc.h"
8 #include "radeon_list.h"
9
10 #define VERBOSE 0
11
12 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
13
14 const struct rc_class rc_class_list_vp [] = {
15 {RC_REG_CLASS_VP_SINGLE, 4,
16 {RC_MASK_X,
17 RC_MASK_Y,
18 RC_MASK_Z,
19 RC_MASK_W,
20 RC_MASK_NONE,
21 RC_MASK_NONE}},
22 {RC_REG_CLASS_VP_DOUBLE, 6,
23 {RC_MASK_X | RC_MASK_Y,
24 RC_MASK_X | RC_MASK_Z,
25 RC_MASK_X | RC_MASK_W,
26 RC_MASK_Y | RC_MASK_Z,
27 RC_MASK_Y | RC_MASK_W,
28 RC_MASK_Z | RC_MASK_W}},
29 {RC_REG_CLASS_VP_TRIPLE, 4,
30 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
31 RC_MASK_X | RC_MASK_Y | RC_MASK_W,
32 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
33 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
34 RC_MASK_NONE,
35 RC_MASK_NONE}},
36 {RC_REG_CLASS_VP_QUADRUPLE, 1,
37 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
38 RC_MASK_NONE,
39 RC_MASK_NONE,
40 RC_MASK_NONE,
41 RC_MASK_NONE,
42 RC_MASK_NONE}}
43 };
44
45 const struct rc_class rc_class_list_fp [] = {
46 {RC_REG_CLASS_FP_SINGLE, 3,
47 {RC_MASK_X,
48 RC_MASK_Y,
49 RC_MASK_Z,
50 RC_MASK_NONE,
51 RC_MASK_NONE,
52 RC_MASK_NONE}},
53 {RC_REG_CLASS_FP_DOUBLE, 3,
54 {RC_MASK_X | RC_MASK_Y,
55 RC_MASK_X | RC_MASK_Z,
56 RC_MASK_Y | RC_MASK_Z,
57 RC_MASK_NONE,
58 RC_MASK_NONE,
59 RC_MASK_NONE}},
60 {RC_REG_CLASS_FP_TRIPLE, 1,
61 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
62 RC_MASK_NONE,
63 RC_MASK_NONE,
64 RC_MASK_NONE,
65 RC_MASK_NONE,
66 RC_MASK_NONE}},
67 {RC_REG_CLASS_FP_ALPHA, 1,
68 {RC_MASK_W,
69 RC_MASK_NONE,
70 RC_MASK_NONE,
71 RC_MASK_NONE,
72 RC_MASK_NONE,
73 RC_MASK_NONE}},
74 {RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, 3,
75 {RC_MASK_X | RC_MASK_W,
76 RC_MASK_Y | RC_MASK_W,
77 RC_MASK_Z | RC_MASK_W,
78 RC_MASK_NONE,
79 RC_MASK_NONE,
80 RC_MASK_NONE}},
81 {RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, 3,
82 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
83 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
84 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
85 RC_MASK_NONE,
86 RC_MASK_NONE,
87 RC_MASK_NONE}},
88 {RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, 1,
89 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
90 RC_MASK_NONE,
91 RC_MASK_NONE,
92 RC_MASK_NONE,
93 RC_MASK_NONE,
94 RC_MASK_NONE}},
95 {RC_REG_CLASS_FP_X, 1,
96 {RC_MASK_X,
97 RC_MASK_NONE,
98 RC_MASK_NONE,
99 RC_MASK_NONE,
100 RC_MASK_NONE,
101 RC_MASK_NONE}},
102 {RC_REG_CLASS_FP_Y, 1,
103 {RC_MASK_Y,
104 RC_MASK_NONE,
105 RC_MASK_NONE,
106 RC_MASK_NONE,
107 RC_MASK_NONE,
108 RC_MASK_NONE}},
109 {RC_REG_CLASS_FP_Z, 1,
110 {RC_MASK_Z,
111 RC_MASK_NONE,
112 RC_MASK_NONE,
113 RC_MASK_NONE,
114 RC_MASK_NONE,
115 RC_MASK_NONE}},
116 {RC_REG_CLASS_FP_XY, 1,
117 {RC_MASK_X | RC_MASK_Y,
118 RC_MASK_NONE,
119 RC_MASK_NONE,
120 RC_MASK_NONE,
121 RC_MASK_NONE,
122 RC_MASK_NONE}},
123 {RC_REG_CLASS_FP_YZ, 1,
124 {RC_MASK_Y | RC_MASK_Z,
125 RC_MASK_NONE,
126 RC_MASK_NONE,
127 RC_MASK_NONE,
128 RC_MASK_NONE,
129 RC_MASK_NONE}},
130 {RC_REG_CLASS_FP_XZ, 1,
131 {RC_MASK_X | RC_MASK_Z,
132 RC_MASK_NONE,
133 RC_MASK_NONE,
134 RC_MASK_NONE,
135 RC_MASK_NONE,
136 RC_MASK_NONE}},
137 {RC_REG_CLASS_FP_XW, 1,
138 {RC_MASK_X | RC_MASK_W,
139 RC_MASK_NONE,
140 RC_MASK_NONE,
141 RC_MASK_NONE,
142 RC_MASK_NONE,
143 RC_MASK_NONE}},
144 {RC_REG_CLASS_FP_YW, 1,
145 {RC_MASK_Y | RC_MASK_W,
146 RC_MASK_NONE,
147 RC_MASK_NONE,
148 RC_MASK_NONE,
149 RC_MASK_NONE,
150 RC_MASK_NONE}},
151 {RC_REG_CLASS_FP_ZW, 1,
152 {RC_MASK_Z | RC_MASK_W,
153 RC_MASK_NONE,
154 RC_MASK_NONE,
155 RC_MASK_NONE,
156 RC_MASK_NONE,
157 RC_MASK_NONE}},
158 {RC_REG_CLASS_FP_XYW, 1,
159 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
160 RC_MASK_NONE,
161 RC_MASK_NONE,
162 RC_MASK_NONE,
163 RC_MASK_NONE,
164 RC_MASK_NONE}},
165 {RC_REG_CLASS_FP_YZW, 1,
166 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
167 RC_MASK_NONE,
168 RC_MASK_NONE,
169 RC_MASK_NONE,
170 RC_MASK_NONE,
171 RC_MASK_NONE}},
172 {RC_REG_CLASS_FP_XZW, 1,
173 {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
174 RC_MASK_NONE,
175 RC_MASK_NONE,
176 RC_MASK_NONE,
177 RC_MASK_NONE,
178 RC_MASK_NONE}}
179 };
180
print_live_intervals(struct live_intervals * src)181 static void print_live_intervals(struct live_intervals * src)
182 {
183 if (!src || !src->Used) {
184 DBG("(null)");
185 return;
186 }
187
188 DBG("(%i,%i)", src->Start, src->End);
189 }
190
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)191 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
192 {
193 if (VERBOSE) {
194 DBG("overlap_live_intervals: ");
195 print_live_intervals(a);
196 DBG(" to ");
197 print_live_intervals(b);
198 DBG("\n");
199 }
200
201 if (!a->Used || !b->Used) {
202 DBG(" unused interval\n");
203 return 0;
204 }
205
206 if (a->Start > b->Start) {
207 if (a->Start < b->End) {
208 DBG(" overlap\n");
209 return 1;
210 }
211 } else if (b->Start > a->Start) {
212 if (b->Start < a->End) {
213 DBG(" overlap\n");
214 return 1;
215 }
216 } else { /* a->Start == b->Start */
217 if (a->Start != a->End && b->Start != b->End) {
218 DBG(" overlap\n");
219 return 1;
220 }
221 }
222
223 DBG(" no overlap\n");
224
225 return 0;
226 }
227
rc_find_class(const struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)228 int rc_find_class(
229 const struct rc_class * classes,
230 unsigned int writemask,
231 unsigned int max_writemask_count)
232 {
233 unsigned int i;
234 for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
235 unsigned int j;
236 if (classes[i].WritemaskCount > max_writemask_count) {
237 continue;
238 }
239 for (j = 0; j < classes[i].WritemaskCount; j++) {
240 if (classes[i].Writemasks[j] == writemask) {
241 return i;
242 }
243 }
244 }
245 return -1;
246 }
247
rc_overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)248 unsigned int rc_overlap_live_intervals_array(
249 struct live_intervals * a,
250 struct live_intervals * b)
251 {
252 unsigned int a_chan, b_chan;
253 for (a_chan = 0; a_chan < 4; a_chan++) {
254 for (b_chan = 0; b_chan < 4; b_chan++) {
255 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
256 return 1;
257 }
258 }
259 }
260 return 0;
261 }
262
263 #if VERBOSE
print_reg(int reg)264 static void print_reg(int reg)
265 {
266 unsigned int index = reg_get_index(reg);
267 unsigned int mask = reg_get_writemask(reg);
268 fprintf(stderr, "Temp[%u].%c%c%c%c", index,
269 mask & RC_MASK_X ? 'x' : '_',
270 mask & RC_MASK_Y ? 'y' : '_',
271 mask & RC_MASK_Z ? 'z' : '_',
272 mask & RC_MASK_W ? 'w' : '_');
273 }
274 #endif
275
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)276 static void add_register_conflicts(
277 struct ra_regs * regs,
278 unsigned int max_temp_regs)
279 {
280 unsigned int index, a_mask, b_mask;
281 for (index = 0; index < max_temp_regs; index++) {
282 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
283 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
284 b_mask++) {
285 if (a_mask & b_mask) {
286 ra_add_reg_conflict(regs,
287 get_reg_id(index, a_mask),
288 get_reg_id(index, b_mask));
289 }
290 }
291 }
292 }
293 }
294
rc_build_interference_graph(struct ra_graph * graph,struct rc_list * variables)295 void rc_build_interference_graph(
296 struct ra_graph * graph,
297 struct rc_list * variables)
298 {
299 unsigned node_index;
300 struct rc_list * var_ptr;
301
302 /* Build the interference graph */
303 for (var_ptr = variables, node_index = 0; var_ptr;
304 var_ptr = var_ptr->Next, node_index++) {
305 struct rc_list * a, * b;
306 unsigned int b_index;
307
308 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
309 b; b = b->Next, b_index++) {
310 struct rc_variable * var_a = a->Item;
311 while (var_a) {
312 struct rc_variable * var_b = b->Item;
313 while (var_b) {
314 if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
315 ra_add_node_interference(graph,
316 node_index, b_index);
317 }
318 var_b = var_b->Friend;
319 }
320 var_a = var_a->Friend;
321 }
322 }
323 }
324 }
325
rc_init_regalloc_state(struct rc_regalloc_state * s,enum rc_program_type prog)326 void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
327 {
328 unsigned i, j, index, class_count, max_temps;
329 unsigned **ra_q_values;
330
331 /* Pre-computed q values. This array describes the maximum number of
332 * a class's [row] registers that are in conflict with a single
333 * register from another class [column].
334 *
335 * For example:
336 * q_values[0][2] is 3, because a register from class 2
337 * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
338 * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
339 * and T0.z.
340 *
341 * q_values[2][0] is 1, because a register from class 0
342 * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
343 * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
344 *
345 * The q values for each register class [row] will never be greater
346 * than the maximum number of writemask combinations for that class.
347 *
348 * For example:
349 *
350 * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
351 * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
352 * than 1.
353 */
354 const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
355 {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
356 {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
357 {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
358 {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
359 {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
360 {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
361 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
362 {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
363 {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
364 {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
365 {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
366 {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
367 {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
368 {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
369 {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
370 {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
371 {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
372 {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
373 {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
374 };
375
376 const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {
377 {1, 2, 3, 4},
378 {3, 5, 6, 6},
379 {3, 4, 4, 4},
380 {1, 1, 1, 1}
381 };
382
383 if (prog == RC_FRAGMENT_PROGRAM) {
384 s->class_list = rc_class_list_fp;
385 class_count = RC_REG_CLASS_FP_COUNT;
386 max_temps = R500_PFS_NUM_TEMP_REGS;
387 } else {
388 s->class_list = rc_class_list_vp;
389 class_count = RC_REG_CLASS_VP_COUNT;
390 max_temps = R300_VS_MAX_TEMPS;
391 }
392
393 /* Allocate the main ra data structure */
394 s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW,
395 true);
396
397 /* Create the register classes */
398 for (i = 0; i < class_count; i++) {
399 const struct rc_class *class = &s->class_list[i];
400 s->classes[class->ID] = ra_alloc_reg_class(s->regs);
401
402 /* Assign registers to the classes */
403 for (index = 0; index < max_temps; index++) {
404 for (j = 0; j < class->WritemaskCount; j++) {
405 int reg_id = get_reg_id(index,
406 class->Writemasks[j]);
407 ra_class_add_reg(s->classes[class->ID], reg_id);
408 }
409 }
410 }
411
412 /* Set the q values. The q_values array is indexed based on
413 * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
414 * different than the ID assigned to that class by ra.
415 * This why we need to manually construct this list.
416 */
417 ra_q_values = MALLOC(class_count * sizeof(unsigned *));
418
419 for (i = 0; i < class_count; i++) {
420 ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
421 for (j = 0; j < class_count; j++) {
422 if (prog == RC_FRAGMENT_PROGRAM)
423 ra_q_values[i][j] = q_values_fp[i][j];
424 else
425 ra_q_values[i][j] = q_values_vp[i][j];
426 }
427 }
428
429 /* Add register conflicts */
430 add_register_conflicts(s->regs, max_temps);
431
432 ra_set_finalize(s->regs, ra_q_values);
433
434 for (i = 0; i < class_count; i++) {
435 FREE(ra_q_values[i]);
436 }
437 FREE(ra_q_values);
438 }
439
rc_destroy_regalloc_state(struct rc_regalloc_state * s)440 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
441 {
442 ralloc_free(s->regs);
443 }
444