xref: /aosp_15_r20/external/mesa3d/src/panfrost/compiler/nodearray.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2021 Icecream95
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /* A nodearray is an array type that is either sparse or dense, depending on
25  * the number of elements.
26  *
27  * When the number of elements is over a threshold (max_sparse), the dense mode
28  * is used, and the nodearray is simply a container for an array.
29  *
30  * In sparse mode, the array has elements with a 24-bit node index and a value.
31  * The nodes are always sorted, so that a binary search can be used to find
32  * elements. Nonexistent elements are treated as zero.
33  *
34  * Function names follow ARM instruction names: orr does *elem |= value.
35  *
36  * Although it's probably already fast enough, the datastructure could be sped
37  * up a lot, especially when NEON is available, by making the sparse mode store
38  * sixteen adjacent values, so that adding new keys also allocates nearby keys,
39  * and to allow for vectorising iteration, as can be done when in the dense
40  * mode.
41  */
42 
43 #ifndef __BIFROST_NODEARRAY_H
44 #define __BIFROST_NODEARRAY_H
45 
46 #include <stdint.h>
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 /* A value that may be stored in a nodearray element, used directly for dense
53  * elements and included into sparse elements.
54  */
55 typedef uint16_t nodearray_value;
56 
57 #define NODEARRAY_MAX_VALUE 0xffff
58 
59 /* Type storing sparse nodearray elements, consisting of a nodearray_value at
60  * the bottom and a nodearray_key at the top.
61  */
62 typedef uint64_t nodearray_sparse;
63 
64 typedef struct {
65    union {
66       nodearray_sparse *sparse;
67       nodearray_value *dense;
68    };
69    unsigned size;
70    unsigned sparse_capacity;
71 } nodearray;
72 
73 /* Align sizes to 16-bytes for SIMD purposes */
74 #define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
75 
76 #define nodearray_sparse_foreach(buf, elem)                                    \
77    for (nodearray_sparse *elem = (buf)->sparse;                                \
78         elem < (buf)->sparse + (buf)->size; elem++)
79 
80 #define nodearray_dense_foreach(buf, elem)                                     \
81    for (nodearray_value *elem = (buf)->dense;                                  \
82         elem < (buf)->dense + (buf)->size; elem++)
83 
84 #define nodearray_dense_foreach_64(buf, elem)                                  \
85    for (uint64_t *elem = (uint64_t *)(buf)->dense;                             \
86         (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
87 
88 static inline bool
nodearray_is_sparse(const nodearray * a)89 nodearray_is_sparse(const nodearray *a)
90 {
91    return a->sparse_capacity != ~0U;
92 }
93 
94 static inline void
nodearray_init(nodearray * a)95 nodearray_init(nodearray *a)
96 {
97    memset(a, 0, sizeof(nodearray));
98 }
99 
100 static inline void
nodearray_reset(nodearray * a)101 nodearray_reset(nodearray *a)
102 {
103    free(a->sparse);
104    nodearray_init(a);
105 }
106 
107 static inline nodearray_sparse
nodearray_encode(unsigned key,nodearray_value value)108 nodearray_encode(unsigned key, nodearray_value value)
109 {
110    static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
111    return ((nodearray_sparse)key << 16) | value;
112 }
113 
114 static inline unsigned
nodearray_sparse_key(const nodearray_sparse * elem)115 nodearray_sparse_key(const nodearray_sparse *elem)
116 {
117    static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
118    return *elem >> 16;
119 }
120 
121 static inline nodearray_value
nodearray_sparse_value(const nodearray_sparse * elem)122 nodearray_sparse_value(const nodearray_sparse *elem)
123 {
124    return *elem & NODEARRAY_MAX_VALUE;
125 }
126 
127 static inline unsigned
nodearray_sparse_search(const nodearray * a,nodearray_sparse key,nodearray_sparse ** elem)128 nodearray_sparse_search(const nodearray *a, nodearray_sparse key,
129                         nodearray_sparse **elem)
130 {
131    assert(nodearray_is_sparse(a) && a->size);
132 
133    nodearray_sparse *data = a->sparse;
134 
135    /* Encode the key using the highest possible value, so that the
136     * matching node must be encoded lower than this
137     */
138    nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
139 
140    unsigned left = 0;
141    unsigned right = a->size - 1;
142 
143    if (data[right] <= skey)
144       left = right;
145 
146    while (left != right) {
147       /* No need to worry about overflow, we couldn't have more than
148        * 2^24 elements */
149       unsigned probe = (left + right + 1) / 2;
150 
151       if (data[probe] > skey)
152          right = probe - 1;
153       else
154          left = probe;
155    }
156 
157    *elem = data + left;
158    return left;
159 }
160 
161 static inline void
nodearray_orr(nodearray * a,unsigned key,nodearray_value value,unsigned max_sparse,unsigned max)162 nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
163               unsigned max_sparse, unsigned max)
164 {
165    assert(key < (1 << 24));
166    assert(key < max);
167 
168    if (!value)
169       return;
170 
171    if (nodearray_is_sparse(a)) {
172       unsigned size = a->size;
173       unsigned left = 0;
174 
175       if (size) {
176          /* First, binary search for key */
177          nodearray_sparse *elem;
178          left = nodearray_sparse_search(a, key, &elem);
179 
180          if (nodearray_sparse_key(elem) == key) {
181             *elem |= value;
182             return;
183          }
184 
185          /* We insert before `left`, so increment it if it's
186           * out of order */
187          if (nodearray_sparse_key(elem) < key)
188             ++left;
189       }
190 
191       if (size < max_sparse && (size + 1) < max / 4) {
192          /* We didn't find it, but we know where to insert it. */
193 
194          nodearray_sparse *data = a->sparse;
195          nodearray_sparse *data_move = data + left;
196 
197          bool realloc = (++a->size) > a->sparse_capacity;
198 
199          if (realloc) {
200             a->sparse_capacity =
201                MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
202 
203             a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity *
204                                                    sizeof(nodearray_sparse));
205 
206             if (left)
207                memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
208          }
209 
210          nodearray_sparse *elem = a->sparse + left;
211 
212          if (left != size)
213             memmove(elem + 1, data_move,
214                     (size - left) * sizeof(nodearray_sparse));
215 
216          *elem = nodearray_encode(key, value);
217 
218          if (realloc)
219             free(data);
220 
221          return;
222       }
223 
224       /* There are too many elements, so convert to a dense array */
225       nodearray old = *a;
226 
227       a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max),
228                                            sizeof(nodearray_value));
229       a->size = max;
230       a->sparse_capacity = ~0U;
231 
232       nodearray_value *data = a->dense;
233 
234       nodearray_sparse_foreach(&old, x) {
235          unsigned key = nodearray_sparse_key(x);
236          nodearray_value value = nodearray_sparse_value(x);
237 
238          assert(key < max);
239          data[key] = value;
240       }
241 
242       free(old.sparse);
243    }
244 
245    a->dense[key] |= value;
246 }
247 
248 #ifdef __cplusplus
249 } /* extern C */
250 #endif
251 
252 #endif
253