xref: /aosp_15_r20/external/mesa3d/src/panfrost/util/lcra.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <[email protected]>
25  */
26 
27 #include "lcra.h"
28 #include <assert.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include "util/macros.h"
34 #include "util/u_math.h"
35 
36 /* This module is the reference implementation of "Linearly Constrained
37  * Register Allocation". The paper is available in PDF form
38  * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39  * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
40  */
41 
42 struct lcra_state *
lcra_alloc_equations(unsigned node_count,unsigned class_count)43 lcra_alloc_equations(unsigned node_count, unsigned class_count)
44 {
45    struct lcra_state *l = calloc(1, sizeof(*l));
46 
47    l->node_count = node_count;
48    l->class_count = class_count;
49 
50    l->alignment = calloc(sizeof(l->alignment[0]), node_count);
51    l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
52    l->modulus = calloc(sizeof(l->modulus[0]), node_count);
53    l->class = calloc(sizeof(l->class[0]), node_count);
54    l->class_start = calloc(sizeof(l->class_start[0]), class_count);
55    l->class_disjoint =
56       calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
57    l->class_size = calloc(sizeof(l->class_size[0]), class_count);
58    l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
59    l->solutions = calloc(sizeof(l->solutions[0]), node_count);
60 
61    memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
62 
63    return l;
64 }
65 
66 void
lcra_free(struct lcra_state * l)67 lcra_free(struct lcra_state *l)
68 {
69    if (!l)
70       return;
71 
72    free(l->alignment);
73    free(l->linear);
74    free(l->modulus);
75    free(l->class);
76    free(l->class_start);
77    free(l->class_disjoint);
78    free(l->class_size);
79    free(l->spill_cost);
80    free(l->solutions);
81 
82    free(l);
83 }
84 
85 void
lcra_set_alignment(struct lcra_state * l,unsigned node,unsigned align_log2,unsigned bound)86 lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2,
87                    unsigned bound)
88 {
89    l->alignment[node] = (align_log2 + 1) | (bound << 16);
90 }
91 
92 void
lcra_set_disjoint_class(struct lcra_state * l,unsigned c1,unsigned c2)93 lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
94 {
95    l->class_disjoint[(c1 * l->class_count) + c2] = true;
96    l->class_disjoint[(c2 * l->class_count) + c1] = true;
97 }
98 
99 void
lcra_restrict_range(struct lcra_state * l,unsigned node,unsigned len)100 lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
101 {
102    if (node < l->node_count && l->alignment[node]) {
103       unsigned BA = l->alignment[node];
104       unsigned alignment = (BA & 0xffff) - 1;
105       unsigned bound = BA >> 16;
106       l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
107    }
108 }
109 
110 void
lcra_add_node_interference(struct lcra_state * l,unsigned i,unsigned cmask_i,unsigned j,unsigned cmask_j)111 lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
112                            unsigned j, unsigned cmask_j)
113 {
114    if (i == j)
115       return;
116 
117    if (l->class_disjoint[(l->class[i] * l -> class_count) + l->class[j]])
118       return;
119 
120    uint32_t constraint_fw = 0;
121    uint32_t constraint_bw = 0;
122 
123    for (unsigned D = 0; D < 16; ++D) {
124       if (cmask_i & (cmask_j << D)) {
125          constraint_bw |= (1 << (15 + D));
126          constraint_fw |= (1 << (15 - D));
127       }
128 
129       if (cmask_i & (cmask_j >> D)) {
130          constraint_fw |= (1 << (15 + D));
131          constraint_bw |= (1 << (15 - D));
132       }
133    }
134 
135    l->linear[j * l->node_count + i] |= constraint_fw;
136    l->linear[i * l->node_count + j] |= constraint_bw;
137 }
138 
139 static bool
lcra_test_linear(struct lcra_state * l,unsigned * solutions,unsigned i)140 lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
141 {
142    unsigned *row = &l->linear[i * l->node_count];
143    signed constant = solutions[i];
144 
145    for (unsigned j = 0; j < l->node_count; ++j) {
146       if (solutions[j] == ~0)
147          continue;
148 
149       signed lhs = solutions[j] - constant;
150 
151       if (lhs < -15 || lhs > 15)
152          continue;
153 
154       if (row[j] & (1 << (lhs + 15)))
155          return false;
156    }
157 
158    return true;
159 }
160 
161 bool
lcra_solve(struct lcra_state * l)162 lcra_solve(struct lcra_state *l)
163 {
164    for (unsigned step = 0; step < l->node_count; ++step) {
165       if (l->solutions[step] != ~0)
166          continue;
167       if (l->alignment[step] == 0)
168          continue;
169 
170       unsigned _class = l->class[step];
171       unsigned class_start = l->class_start[_class];
172 
173       unsigned BA = l->alignment[step];
174       unsigned shift = (BA & 0xffff) - 1;
175       unsigned bound = BA >> 16;
176 
177       unsigned P = bound >> shift;
178       unsigned Q = l->modulus[step];
179       unsigned r_max = l->class_size[_class];
180       unsigned k_max = r_max >> shift;
181       unsigned m_max = k_max / P;
182       bool succ = false;
183 
184       for (unsigned m = 0; m < m_max; ++m) {
185          for (unsigned n = 0; n < Q; ++n) {
186             l->solutions[step] = ((m * P + n) << shift) + class_start;
187             succ = lcra_test_linear(l, l->solutions, step);
188 
189             if (succ)
190                break;
191          }
192 
193          if (succ)
194             break;
195       }
196 
197       /* Out of registers - prepare to spill */
198       if (!succ) {
199          l->spill_class = l->class[step];
200          return false;
201       }
202    }
203 
204    return true;
205 }
206 
207 /* Register spilling is implemented with a cost-benefit system. Costs are set
208  * by the user. Benefits are calculated from the constraints. */
209 
210 void
lcra_set_node_spill_cost(struct lcra_state * l,unsigned node,signed cost)211 lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
212 {
213    if (node < l->node_count)
214       l->spill_cost[node] = cost;
215 }
216 
217 static unsigned
lcra_count_constraints(struct lcra_state * l,unsigned i)218 lcra_count_constraints(struct lcra_state *l, unsigned i)
219 {
220    unsigned count = 0;
221    unsigned *constraints = &l->linear[i * l->node_count];
222 
223    for (unsigned j = 0; j < l->node_count; ++j)
224       count += util_bitcount(constraints[j]);
225 
226    return count;
227 }
228 
229 signed
lcra_get_best_spill_node(struct lcra_state * l)230 lcra_get_best_spill_node(struct lcra_state *l)
231 {
232    /* If there are no constraints on a node, do not pick it to spill under
233     * any circumstance, or else we would hang rather than fail RA */
234    float best_benefit = 0.0;
235    signed best_node = -1;
236 
237    for (unsigned i = 0; i < l->node_count; ++i) {
238       /* Find spillable nodes */
239       if (l->class[i] != l->spill_class)
240          continue;
241       if (l->spill_cost[i] < 0)
242          continue;
243 
244       /* Adapted from Chaitin's heuristic */
245       float constraints = lcra_count_constraints(l, i);
246       float cost = (l->spill_cost[i] + 1);
247       float benefit = constraints / cost;
248 
249       if (benefit > best_benefit) {
250          best_benefit = benefit;
251          best_node = i;
252       }
253    }
254 
255    return best_node;
256 }
257