1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* See the big comment.
8 *
9 * Compile: gcc find_hash_func.c -fopenmp -O3 -g -o find_hash_func
10 */
11
12 #include <stdio.h>
13 #include <string.h>
14 #include <stdbool.h>
15 #include <alloca.h>
16 #include <GL/gl.h>
17 #include <GL/glext.h>
18 #include <GLES2/gl2.h>
19 #include <GLES2/gl2ext.h>
20
21 #define MAX_GLENUM_BITS 16
22
23 /* Duplicate this here. Don't pull the whole Mesa's built system into this. */
24 static inline unsigned
util_next_power_of_two(unsigned x)25 util_next_power_of_two(unsigned x)
26 {
27 if (x <= 1)
28 return 1;
29
30 return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1)));
31 }
32
33 struct entry {
34 unsigned result;
35 const char *name;
36 unsigned value;
37 };
38
39 /* Given a list of large values (such as GLenums), find a simple perfect hash
40 * function that maps the large values to smallest possible numbers for use as
41 * array indices, so that we can index arrays by hash(GLenum). This is useful
42 * when a switch statement for conversions from GLenums to indices would be
43 * undesirable.
44 *
45 * The final hash function is always in this form:
46 * hash(x) = ((x * mul) >> rshift) & BITFIELD_MASK(bits)
47 *
48 * This is a brute force algorithm that tries to find all injective
49 * (mul, rshift, bits) hash functions and return the one whose maximum
50 * generated value is the smallest.
51 */
52 static bool
find_perfect_hash_func(const struct entry * list,unsigned * best_mul,unsigned * best_rshift,unsigned * best_mask,unsigned * best_max)53 find_perfect_hash_func(const struct entry *list, unsigned *best_mul,
54 unsigned *best_rshift, unsigned *best_mask,
55 unsigned *best_max)
56 {
57 bool found = false;
58 *best_mul = 1;
59 *best_rshift = 0;
60 *best_mask = ~0;
61 *best_max = (1 << MAX_GLENUM_BITS) - 1;
62
63 for (unsigned mul = 1; mul < (1 << 16); mul++) {
64 for (unsigned rshift = 1; rshift <= 31; rshift++) {
65 for (unsigned bits = 1; bits <= MAX_GLENUM_BITS; bits++) {
66 unsigned mask = (1 << bits) - 1;
67 unsigned max = 0;
68
69 for (unsigned a = 0; list[a].name; a++) {
70 unsigned hash = ((list[a].value * mul) >> rshift) & mask;
71
72 max = hash > max ? hash : max;
73
74 for (unsigned b = a + 1; list[b].name; b++) {
75 /* Skip if the mapping is not injective. */
76 if (hash == (((list[b].value * mul) >> rshift) & mask))
77 goto fail;
78 }
79 }
80 if (max < *best_max) {
81 *best_mul = mul;
82 *best_rshift = rshift;
83 *best_mask = mask;
84 *best_max = max;
85 found = true;
86 }
87 fail:;
88 }
89 }
90 }
91 return found;
92 }
93
94 static bool
find_translate_func(const struct entry * list,unsigned * out_mul,unsigned * out_rshift,unsigned * out_mask,unsigned max_result)95 find_translate_func(const struct entry *list, unsigned *out_mul,
96 unsigned *out_rshift, unsigned *out_mask,
97 unsigned max_result)
98 {
99 unsigned mask = util_next_power_of_two(max_result + 1) - 1;
100 unsigned num_threads = 24;
101 unsigned start_mul = 1;
102 unsigned end_mul = (1 << 31) - num_threads;
103
104 int thread_id_finished = -1;
105 unsigned *result_mul = alloca(4 * num_threads);
106 unsigned *result_rshift = alloca(4 * num_threads);
107
108 #pragma omp parallel for
109 for (unsigned thread_id = 0; thread_id < num_threads; thread_id++) {
110 for (unsigned mul = start_mul; mul < end_mul; mul += num_threads) {
111 for (unsigned rshift = 1; rshift <= 31; rshift++) {
112 for (unsigned add = 0; add <= max_result; add++) {
113 for (unsigned a = 0; list[a].name; a++) {
114 unsigned hash = (((list[a].value * mul) >> rshift) + add) & mask;
115
116 /* Reject the mapping if it doesn't return the expected result. */
117 if (hash != list[a].result)
118 goto fail;
119 }
120
121 result_mul[thread_id] = mul;
122 result_rshift[thread_id] = rshift;
123 __atomic_store_n(&thread_id_finished, thread_id, __ATOMIC_RELEASE);
124 puts("found");
125 goto done;
126 fail:;
127 }
128 }
129 }
130 done:;
131 }
132
133 if (__atomic_load_n(&thread_id_finished, __ATOMIC_ACQUIRE) >= 0) {
134 *out_mul = result_mul[thread_id_finished];
135 *out_rshift = result_rshift[thread_id_finished];
136 *out_mask = mask;
137 return true;
138 }
139
140 return false;
141 }
142
143 static void
print_hash_code(const char * uppercase_name,const char * lowercase_name,const struct entry * list,bool get_translate_func)144 print_hash_code(const char *uppercase_name, const char *lowercase_name,
145 const struct entry *list, bool get_translate_func)
146 {
147 unsigned mul, rshift, mask, max;
148 unsigned max_strlen = 0, max_result = 0;
149
150 for (unsigned i = 0; list[i].name; i++) {
151 int len = strlen(list[i].name);
152 max_strlen = len > max_strlen ? len : max_strlen;
153 max_result = list[i].result > max_result ? list[i].result : max_result;
154 }
155
156 /* Find the hash function that can be used as a translation function (no table). */
157 if (get_translate_func) {
158 if (find_translate_func(list, &mul, &rshift, &mask, max_result)) {
159 printf("/* Translate enums to desired values arithmetically (without a switch) */\n");
160 printf("#define TRANSLATE_%s(x) ((((uint32_t)(x) * %u) >> %u) & 0x%x)\n\n",
161 uppercase_name, mul, rshift, mask);
162
163 for (unsigned i = 0; list[i].name; i++) {
164 printf("static_assert(TRANSLATE_%s(%s) == %u)\n",
165 uppercase_name, list[i].name, list[i].result);
166 }
167 printf("\n");
168 } else {
169 puts("/* ERROR: Can't find the hash function for translating. */");
170 }
171 } else {
172 /* Find the hash function that can be used for indexing into a table. */
173 if (find_perfect_hash_func(list, &mul, &rshift, &mask, &max)) {
174 printf("/* Map enums to smaller enums arithmetically (without a switch) */\n");
175 printf("#define PERF_HASH_%s(x) ((((uint32_t))(x) * %u) >> %u) & 0x%x)\n\n",
176 uppercase_name, mul, rshift, mask);
177
178 /* Print the translation table. */
179 printf("static const uint%u_t %s_table[16] = {\n",
180 max > 255 ? 16 : 8, lowercase_name);
181 printf(" /* These elements are sorted by meaning, not value. */\n");
182 for (unsigned i = 0; list[i].name; i++)
183 printf(" [/*%2u*/ PERF_HASH_%s(%s)] = 0,\n",
184 ((list[i].value * mul) >> rshift) & mask, uppercase_name, list[i].name);
185 printf("};\n\n");
186
187 /* Print the uniqueness compile check. */
188 printf("static inline void\n");
189 printf("compile_check_uniqueness_of_%s(unsigned x)\n", lowercase_name);
190 printf("{\n");
191 printf(" /* This switch has the same purpose as static_assert.\n");
192 printf(" * It should fail compilation if any case is not unique.\n");
193 printf(" */\n");
194 printf(" switch (x) {\n");
195 for (unsigned i = 0; list[i].name; i++)
196 printf(" case PERF_HASH_%s(%s):\n", uppercase_name, list[i].name);
197 printf(" break;\n");
198 printf(" }\n");
199 printf("}\n\n");
200
201 printf("/* GL enums mapped to smaller numbers. The number are not contiguous. */\n");
202 printf("typedef enum {\n");
203 for (unsigned i = 0; list[i].name; i++) {
204 printf(" MESA_%s = %*s/*%2u*/ PERF_HASH_%s(%s),\n",
205 list[i].name + 3,
206 1 + max_strlen - (int)strlen(list[i].name), " ",
207 ((list[i].value * mul) >> rshift) & mask,
208 uppercase_name, list[i].name);
209 }
210 printf("\n NUM_%sS = %u,\n", uppercase_name, max + 1);
211 printf(" NUM_%sS_POW2 = %u,\n",
212 uppercase_name, util_next_power_of_two(max + 1));
213 printf("} %s;\n\n", lowercase_name);
214 } else {
215 puts("/* ERROR: Can't find the hash function for indexing. */");
216 }
217 }
218 }
219
220 #define S(x) #x, x
221
main(int argc,char ** argv)222 int main(int argc, char **argv)
223 {
224 struct entry vertex_types[] = {
225 {0, S(GL_BYTE)},
226 {0, S(GL_UNSIGNED_BYTE)},
227 {0, S(GL_INT_2_10_10_10_REV)},
228 {0, S(GL_UNSIGNED_INT_2_10_10_10_REV)},
229 {1, S(GL_SHORT)},
230 {1, S(GL_UNSIGNED_SHORT)},
231 {1, S(GL_HALF_FLOAT_ARB)},
232 {1, S(GL_HALF_FLOAT_OES)},
233 {2, S(GL_INT)},
234 {2, S(GL_UNSIGNED_INT)},
235 {2, S(GL_FLOAT)},
236 {2, S(GL_FIXED)},
237 {3, S(GL_DOUBLE)},
238 {3, S(GL_UNSIGNED_INT64_ARB)},
239 {0},
240 };
241 print_hash_code("GL_VERTEX_TYPE", "gl_vertex_type", vertex_types, false);
242
243 return 0;
244 }
245