xref: /aosp_15_r20/external/mesa3d/src/compiler/clc/nir_load_libclc.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_clc_helpers.h"
26 #include "nir_serialize.h"
27 #include "nir_spirv.h"
28 #include "util/mesa-sha1.h"
29 
30 #ifdef DYNAMIC_LIBCLC_PATH
31 #include <fcntl.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <unistd.h>
36 #endif
37 
38 #ifdef HAVE_STATIC_LIBCLC_ZSTD
39 #include <zstd.h>
40 #endif
41 
42 #ifdef HAVE_STATIC_LIBCLC_SPIRV
43 #include "spirv-mesa3d-.spv.h"
44 #endif
45 
46 #ifdef HAVE_STATIC_LIBCLC_SPIRV64
47 #include "spirv64-mesa3d-.spv.h"
48 #endif
49 
50 struct clc_file {
51    unsigned bit_size;
52    const char *static_data;
53    size_t static_data_size;
54    const char *sys_path;
55 };
56 
57 static const struct clc_file libclc_files[] = {
58    {
59       .bit_size = 32,
60 #ifdef HAVE_STATIC_LIBCLC_SPIRV
61       .static_data = libclc_spirv_mesa3d_spv,
62       .static_data_size = sizeof(libclc_spirv_mesa3d_spv),
63 #endif
64 #ifdef DYNAMIC_LIBCLC_PATH
65       .sys_path = DYNAMIC_LIBCLC_PATH "spirv-mesa3d-.spv",
66 #endif
67    },
68    {
69       .bit_size = 64,
70 #ifdef HAVE_STATIC_LIBCLC_SPIRV64
71       .static_data = libclc_spirv64_mesa3d_spv,
72       .static_data_size = sizeof(libclc_spirv64_mesa3d_spv),
73 #endif
74 #ifdef DYNAMIC_LIBCLC_PATH
75       .sys_path = DYNAMIC_LIBCLC_PATH "spirv64-mesa3d-.spv",
76 #endif
77    },
78 };
79 
80 static const struct clc_file *
get_libclc_file(unsigned ptr_bit_size)81 get_libclc_file(unsigned ptr_bit_size)
82 {
83    assert(ptr_bit_size == 32 || ptr_bit_size == 64);
84    return &libclc_files[ptr_bit_size / 64];
85 }
86 
87 struct clc_data {
88    const struct clc_file *file;
89 
90    unsigned char cache_key[20];
91 
92    int fd;
93    const void *data;
94    size_t size;
95 };
96 
97 static bool
open_clc_data(struct clc_data * clc,unsigned ptr_bit_size)98 open_clc_data(struct clc_data *clc, unsigned ptr_bit_size)
99 {
100    memset(clc, 0, sizeof(*clc));
101    clc->file = get_libclc_file(ptr_bit_size);
102    clc->fd = -1;
103 
104    if (clc->file->static_data) {
105       snprintf((char *)clc->cache_key, sizeof(clc->cache_key),
106                "libclc-spirv%d", ptr_bit_size);
107       return true;
108    }
109 
110 #ifdef DYNAMIC_LIBCLC_PATH
111    if (clc->file->sys_path != NULL) {
112       int fd = open(clc->file->sys_path, O_RDONLY);
113       if (fd < 0)
114          return false;
115 
116       struct stat stat;
117       int ret = fstat(fd, &stat);
118       if (ret < 0) {
119          fprintf(stderr, "fstat failed on %s: %m\n", clc->file->sys_path);
120          close(fd);
121          return false;
122       }
123 
124       struct mesa_sha1 ctx;
125       _mesa_sha1_init(&ctx);
126       _mesa_sha1_update(&ctx, clc->file->sys_path, strlen(clc->file->sys_path));
127 #if defined(__APPLE__) || defined(__MACOSX)
128       _mesa_sha1_update(&ctx, &stat.st_mtime, sizeof(stat.st_mtime));
129 #else
130       _mesa_sha1_update(&ctx, &stat.st_mtim, sizeof(stat.st_mtim));
131 #endif
132       _mesa_sha1_final(&ctx, clc->cache_key);
133 
134       clc->fd = fd;
135 
136       return true;
137    }
138 #endif
139 
140    return false;
141 }
142 
143 #define SPIRV_WORD_SIZE 4
144 
145 static bool
map_clc_data(struct clc_data * clc)146 map_clc_data(struct clc_data *clc)
147 {
148    if (clc->file->static_data) {
149 #ifdef HAVE_STATIC_LIBCLC_ZSTD
150       unsigned long long cmp_size =
151          ZSTD_getFrameContentSize(clc->file->static_data,
152                                   clc->file->static_data_size);
153       if (cmp_size == ZSTD_CONTENTSIZE_UNKNOWN ||
154           cmp_size == ZSTD_CONTENTSIZE_ERROR) {
155          fprintf(stderr, "Could not determine the decompressed size of the "
156                          "libclc SPIR-V\n");
157          return false;
158       }
159 
160       size_t frame_size =
161          ZSTD_findFrameCompressedSize(clc->file->static_data,
162                                       clc->file->static_data_size);
163       if (ZSTD_isError(frame_size)) {
164          fprintf(stderr, "Could not determine the size of the first ZSTD frame "
165                          "when decompressing libclc SPIR-V: %s\n",
166                  ZSTD_getErrorName(frame_size));
167          return false;
168       }
169 
170       void *dest = malloc(cmp_size + 1);
171       size_t size = ZSTD_decompress(dest, cmp_size, clc->file->static_data,
172                                     frame_size);
173       if (ZSTD_isError(size)) {
174          free(dest);
175          fprintf(stderr, "Error decompressing libclc SPIR-V: %s\n",
176                  ZSTD_getErrorName(size));
177          return false;
178       }
179 
180       clc->data = dest;
181       clc->size = size;
182 #else
183       clc->data = clc->file->static_data;
184       clc->size = clc->file->static_data_size;
185 #endif
186       return true;
187    }
188 
189 #ifdef DYNAMIC_LIBCLC_PATH
190    if (clc->file->sys_path != NULL) {
191       off_t len = lseek(clc->fd, 0, SEEK_END);
192       if (len % SPIRV_WORD_SIZE != 0) {
193          fprintf(stderr, "File length isn't a multiple of the word size\n");
194          return false;
195       }
196       clc->size = len;
197 
198       clc->data = mmap(NULL, len, PROT_READ, MAP_PRIVATE, clc->fd, 0);
199       if (clc->data == MAP_FAILED) {
200          fprintf(stderr, "Failed to mmap libclc SPIR-V: %m\n");
201          return false;
202       }
203 
204       return true;
205    }
206 #endif
207 
208    return true;
209 }
210 
211 static void
close_clc_data(struct clc_data * clc)212 close_clc_data(struct clc_data *clc)
213 {
214    if (clc->file->static_data) {
215 #ifdef HAVE_STATIC_LIBCLC_ZSTD
216       free((void *)clc->data);
217 #endif
218       return;
219    }
220 
221 #ifdef DYNAMIC_LIBCLC_PATH
222    if (clc->file->sys_path != NULL) {
223       if (clc->data)
224          munmap((void *)clc->data, clc->size);
225       close(clc->fd);
226    }
227 #endif
228 }
229 
230 /** Returns true if libclc is found
231  *
232  * If libclc is compiled in statically, this always returns true.  If we
233  * depend on a dynamic libclc, this opens and tries to stat the file.
234  */
235 bool
nir_can_find_libclc(unsigned ptr_bit_size)236 nir_can_find_libclc(unsigned ptr_bit_size)
237 {
238    struct clc_data clc;
239    if (open_clc_data(&clc, ptr_bit_size)) {
240       close_clc_data(&clc);
241       return true;
242    } else {
243       return false;
244    }
245 }
246 
247 /** Adds generic pointer variants of libclc functions
248  *
249  * Libclc currently doesn't contain generic variants for a bunch of functions
250  * like `frexp` but the OpenCL spec with generic pointers requires them.  We
251  * really should fix libclc but, in the mean time, we can easily duplicate
252  * every function that works on global memory and make it also work on generic
253  * memory.
254  */
255 static void
libclc_add_generic_variants(nir_shader * shader)256 libclc_add_generic_variants(nir_shader *shader)
257 {
258    nir_foreach_function(func, shader) {
259       /* These don't need generic variants */
260       if (strstr(func->name, "async_work_group_strided_copy"))
261          continue;
262 
263       char *U3AS1 = strstr(func->name, "U3AS1");
264       if (U3AS1 == NULL)
265          continue;
266 
267       ptrdiff_t offset_1 = U3AS1 - func->name + 4;
268       assert(offset_1 < strlen(func->name) && func->name[offset_1] == '1');
269 
270       char *generic_name = ralloc_strdup(shader, func->name);
271       assert(generic_name[offset_1] == '1');
272       generic_name[offset_1] = '4';
273 
274       if (nir_shader_get_function_for_name(shader, generic_name))
275          continue;
276 
277       nir_function *gfunc = nir_function_create(shader, generic_name);
278       gfunc->num_params = func->num_params;
279       gfunc->params = ralloc_array(shader, nir_parameter, gfunc->num_params);
280       for (unsigned i = 0; i < gfunc->num_params; i++)
281          gfunc->params[i] = func->params[i];
282 
283       nir_function_set_impl(gfunc, nir_function_impl_clone(shader, func->impl));
284 
285       /* Rewrite any global pointers to generic */
286       nir_foreach_block(block, gfunc->impl) {
287          nir_foreach_instr(instr, block) {
288             if (instr->type != nir_instr_type_deref)
289                continue;
290 
291             nir_deref_instr *deref = nir_instr_as_deref(instr);
292             if (!nir_deref_mode_may_be(deref, nir_var_mem_global))
293                continue;
294 
295             assert(deref->type != nir_deref_type_var);
296             assert(nir_deref_mode_is(deref, nir_var_mem_global));
297 
298             deref->modes = nir_var_mem_generic;
299          }
300       }
301 
302       nir_metadata_preserve(gfunc->impl, nir_metadata_none);
303    }
304 }
305 
306 nir_shader *
nir_load_libclc_shader(unsigned ptr_bit_size,struct disk_cache * disk_cache,const struct spirv_to_nir_options * spirv_options,const nir_shader_compiler_options * nir_options,bool optimize)307 nir_load_libclc_shader(unsigned ptr_bit_size,
308                        struct disk_cache *disk_cache,
309                        const struct spirv_to_nir_options *spirv_options,
310                        const nir_shader_compiler_options *nir_options,
311                        bool optimize)
312 {
313    assert(ptr_bit_size ==
314           nir_address_format_bit_size(spirv_options->global_addr_format));
315 
316    struct clc_data clc;
317    if (!open_clc_data(&clc, ptr_bit_size))
318       return NULL;
319 
320 #ifdef ENABLE_SHADER_CACHE
321    cache_key cache_key;
322    if (disk_cache) {
323       disk_cache_compute_key(disk_cache, clc.cache_key,
324                              sizeof(clc.cache_key), cache_key);
325 
326       size_t buffer_size;
327       uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
328       if (buffer) {
329          struct blob_reader blob;
330          blob_reader_init(&blob, buffer, buffer_size);
331          nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
332          free(buffer);
333          close_clc_data(&clc);
334          return nir;
335       }
336    }
337 #endif
338 
339    if (!map_clc_data(&clc)) {
340       close_clc_data(&clc);
341       return NULL;
342    }
343 
344    struct spirv_to_nir_options spirv_lib_options = *spirv_options;
345    spirv_lib_options.create_library = true;
346 
347    assert(clc.size % SPIRV_WORD_SIZE == 0);
348    nir_shader *nir = spirv_to_nir(clc.data, clc.size / SPIRV_WORD_SIZE,
349                                   NULL, 0, MESA_SHADER_KERNEL, NULL,
350                                   &spirv_lib_options, nir_options);
351    nir_validate_shader(nir, "after nir_load_clc_shader");
352 
353    /* nir_inline_libclc will assume that the functions in this shader are
354     * already ready to lower.  This means we need to inline any function_temp
355     * initializers and lower any early returns.
356     */
357    nir->info.internal = true;
358    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
359    NIR_PASS_V(nir, nir_lower_returns);
360 
361    NIR_PASS_V(nir, libclc_add_generic_variants);
362 
363    /* Run some optimization passes. Those used here should be considered safe
364     * for all use cases and drivers.
365     */
366    if (optimize) {
367       NIR_PASS_V(nir, nir_split_var_copies);
368 
369       bool progress;
370       do {
371          progress = false;
372          NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
373          NIR_PASS(progress, nir, nir_lower_var_copies);
374          NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
375          NIR_PASS(progress, nir, nir_copy_prop);
376          NIR_PASS(progress, nir, nir_opt_remove_phis);
377          NIR_PASS(progress, nir, nir_opt_dce);
378          NIR_PASS(progress, nir, nir_opt_if, false);
379          NIR_PASS(progress, nir, nir_opt_dead_cf);
380          NIR_PASS(progress, nir, nir_opt_cse);
381          /* drivers run this pass, so don't be too aggressive. More aggressive
382           * values only increase effectiveness by <5%
383           */
384          NIR_PASS(progress, nir, nir_opt_peephole_select, 0, false, false);
385          NIR_PASS(progress, nir, nir_opt_algebraic);
386          NIR_PASS(progress, nir, nir_opt_constant_folding);
387          NIR_PASS(progress, nir, nir_opt_undef);
388          NIR_PASS(progress, nir, nir_opt_deref);
389       } while(progress);
390 
391       nir_sweep(nir);
392    }
393 
394 #ifdef ENABLE_SHADER_CACHE
395    if (disk_cache) {
396       struct blob blob;
397       blob_init(&blob);
398       nir_serialize(&blob, nir, false);
399       disk_cache_put(disk_cache, cache_key, blob.data, blob.size, NULL);
400       blob_finish(&blob);
401    }
402 #endif
403 
404    close_clc_data(&clc);
405    return nir;
406 }
407