1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <[email protected]>
25 */
26
27 #include "lcra.h"
28 #include <assert.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include "util/macros.h"
34 #include "util/u_math.h"
35
36 /* This module is the reference implementation of "Linearly Constrained
37 * Register Allocation". The paper is available in PDF form
38 * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39 * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
40 */
41
42 struct lcra_state *
lcra_alloc_equations(unsigned node_count,unsigned class_count)43 lcra_alloc_equations(unsigned node_count, unsigned class_count)
44 {
45 struct lcra_state *l = calloc(1, sizeof(*l));
46
47 l->node_count = node_count;
48 l->class_count = class_count;
49
50 l->alignment = calloc(sizeof(l->alignment[0]), node_count);
51 l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
52 l->modulus = calloc(sizeof(l->modulus[0]), node_count);
53 l->class = calloc(sizeof(l->class[0]), node_count);
54 l->class_start = calloc(sizeof(l->class_start[0]), class_count);
55 l->class_disjoint =
56 calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
57 l->class_size = calloc(sizeof(l->class_size[0]), class_count);
58 l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
59 l->solutions = calloc(sizeof(l->solutions[0]), node_count);
60
61 memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
62
63 return l;
64 }
65
66 void
lcra_free(struct lcra_state * l)67 lcra_free(struct lcra_state *l)
68 {
69 if (!l)
70 return;
71
72 free(l->alignment);
73 free(l->linear);
74 free(l->modulus);
75 free(l->class);
76 free(l->class_start);
77 free(l->class_disjoint);
78 free(l->class_size);
79 free(l->spill_cost);
80 free(l->solutions);
81
82 free(l);
83 }
84
85 void
lcra_set_alignment(struct lcra_state * l,unsigned node,unsigned align_log2,unsigned bound)86 lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2,
87 unsigned bound)
88 {
89 l->alignment[node] = (align_log2 + 1) | (bound << 16);
90 }
91
92 void
lcra_set_disjoint_class(struct lcra_state * l,unsigned c1,unsigned c2)93 lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
94 {
95 l->class_disjoint[(c1 * l->class_count) + c2] = true;
96 l->class_disjoint[(c2 * l->class_count) + c1] = true;
97 }
98
99 void
lcra_restrict_range(struct lcra_state * l,unsigned node,unsigned len)100 lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
101 {
102 if (node < l->node_count && l->alignment[node]) {
103 unsigned BA = l->alignment[node];
104 unsigned alignment = (BA & 0xffff) - 1;
105 unsigned bound = BA >> 16;
106 l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
107 }
108 }
109
110 void
lcra_add_node_interference(struct lcra_state * l,unsigned i,unsigned cmask_i,unsigned j,unsigned cmask_j)111 lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
112 unsigned j, unsigned cmask_j)
113 {
114 if (i == j)
115 return;
116
117 if (l->class_disjoint[(l->class[i] * l -> class_count) + l->class[j]])
118 return;
119
120 uint32_t constraint_fw = 0;
121 uint32_t constraint_bw = 0;
122
123 for (unsigned D = 0; D < 16; ++D) {
124 if (cmask_i & (cmask_j << D)) {
125 constraint_bw |= (1 << (15 + D));
126 constraint_fw |= (1 << (15 - D));
127 }
128
129 if (cmask_i & (cmask_j >> D)) {
130 constraint_fw |= (1 << (15 + D));
131 constraint_bw |= (1 << (15 - D));
132 }
133 }
134
135 l->linear[j * l->node_count + i] |= constraint_fw;
136 l->linear[i * l->node_count + j] |= constraint_bw;
137 }
138
139 static bool
lcra_test_linear(struct lcra_state * l,unsigned * solutions,unsigned i)140 lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
141 {
142 unsigned *row = &l->linear[i * l->node_count];
143 signed constant = solutions[i];
144
145 for (unsigned j = 0; j < l->node_count; ++j) {
146 if (solutions[j] == ~0)
147 continue;
148
149 signed lhs = solutions[j] - constant;
150
151 if (lhs < -15 || lhs > 15)
152 continue;
153
154 if (row[j] & (1 << (lhs + 15)))
155 return false;
156 }
157
158 return true;
159 }
160
161 bool
lcra_solve(struct lcra_state * l)162 lcra_solve(struct lcra_state *l)
163 {
164 for (unsigned step = 0; step < l->node_count; ++step) {
165 if (l->solutions[step] != ~0)
166 continue;
167 if (l->alignment[step] == 0)
168 continue;
169
170 unsigned _class = l->class[step];
171 unsigned class_start = l->class_start[_class];
172
173 unsigned BA = l->alignment[step];
174 unsigned shift = (BA & 0xffff) - 1;
175 unsigned bound = BA >> 16;
176
177 unsigned P = bound >> shift;
178 unsigned Q = l->modulus[step];
179 unsigned r_max = l->class_size[_class];
180 unsigned k_max = r_max >> shift;
181 unsigned m_max = k_max / P;
182 bool succ = false;
183
184 for (unsigned m = 0; m < m_max; ++m) {
185 for (unsigned n = 0; n < Q; ++n) {
186 l->solutions[step] = ((m * P + n) << shift) + class_start;
187 succ = lcra_test_linear(l, l->solutions, step);
188
189 if (succ)
190 break;
191 }
192
193 if (succ)
194 break;
195 }
196
197 /* Out of registers - prepare to spill */
198 if (!succ) {
199 l->spill_class = l->class[step];
200 return false;
201 }
202 }
203
204 return true;
205 }
206
207 /* Register spilling is implemented with a cost-benefit system. Costs are set
208 * by the user. Benefits are calculated from the constraints. */
209
210 void
lcra_set_node_spill_cost(struct lcra_state * l,unsigned node,signed cost)211 lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
212 {
213 if (node < l->node_count)
214 l->spill_cost[node] = cost;
215 }
216
217 static unsigned
lcra_count_constraints(struct lcra_state * l,unsigned i)218 lcra_count_constraints(struct lcra_state *l, unsigned i)
219 {
220 unsigned count = 0;
221 unsigned *constraints = &l->linear[i * l->node_count];
222
223 for (unsigned j = 0; j < l->node_count; ++j)
224 count += util_bitcount(constraints[j]);
225
226 return count;
227 }
228
229 signed
lcra_get_best_spill_node(struct lcra_state * l)230 lcra_get_best_spill_node(struct lcra_state *l)
231 {
232 /* If there are no constraints on a node, do not pick it to spill under
233 * any circumstance, or else we would hang rather than fail RA */
234 float best_benefit = 0.0;
235 signed best_node = -1;
236
237 for (unsigned i = 0; i < l->node_count; ++i) {
238 /* Find spillable nodes */
239 if (l->class[i] != l->spill_class)
240 continue;
241 if (l->spill_cost[i] < 0)
242 continue;
243
244 /* Adapted from Chaitin's heuristic */
245 float constraints = lcra_count_constraints(l, i);
246 float cost = (l->spill_cost[i] + 1);
247 float benefit = constraints / cost;
248
249 if (benefit > best_benefit) {
250 best_benefit = benefit;
251 best_node = i;
252 }
253 }
254
255 return best_node;
256 }
257