xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/iris_program_cache.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_program_cache.c
25  *
26  * The in-memory program cache.  This is basically a hash table mapping
27  * API-specified shaders and a state key to a compiled variant.  It also
28  * takes care of uploading shader assembly into a BO for use on the GPU.
29  */
30 
31 #include <stdio.h>
32 #include <errno.h>
33 #include "pipe/p_defines.h"
34 #include "pipe/p_state.h"
35 #include "pipe/p_context.h"
36 #include "pipe/p_screen.h"
37 #include "util/u_atomic.h"
38 #include "util/u_upload_mgr.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/compiler/brw_nir.h"
43 #include "intel/compiler/elk/elk_compiler.h"
44 #include "intel/compiler/elk/elk_nir.h"
45 #include "iris_context.h"
46 #include "iris_resource.h"
47 
48 struct keybox {
49    uint16_t size;
50    enum iris_program_cache_id cache_id;
51    uint8_t data[0];
52 };
53 
54 static struct keybox *
make_keybox(void * mem_ctx,enum iris_program_cache_id cache_id,const void * key,uint32_t key_size)55 make_keybox(void *mem_ctx,
56             enum iris_program_cache_id cache_id,
57             const void *key,
58             uint32_t key_size)
59 {
60    struct keybox *keybox =
61       ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
62 
63    keybox->cache_id = cache_id;
64    keybox->size = key_size;
65    memcpy(keybox->data, key, key_size);
66 
67    return keybox;
68 }
69 
70 static uint32_t
keybox_hash(const void * void_key)71 keybox_hash(const void *void_key)
72 {
73    const struct keybox *key = void_key;
74    return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
75 }
76 
77 static bool
keybox_equals(const void * void_a,const void * void_b)78 keybox_equals(const void *void_a, const void *void_b)
79 {
80    const struct keybox *a = void_a, *b = void_b;
81    if (a->size != b->size)
82       return false;
83 
84    return memcmp(a->data, b->data, a->size) == 0;
85 }
86 
87 struct iris_compiled_shader *
iris_find_cached_shader(struct iris_context * ice,enum iris_program_cache_id cache_id,uint32_t key_size,const void * key)88 iris_find_cached_shader(struct iris_context *ice,
89                         enum iris_program_cache_id cache_id,
90                         uint32_t key_size,
91                         const void *key)
92 {
93    struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
94    struct hash_entry *entry =
95       _mesa_hash_table_search(ice->shaders.cache, keybox);
96 
97    ralloc_free(keybox);
98 
99    return entry ? entry->data : NULL;
100 }
101 
102 void
iris_delete_shader_variant(struct iris_compiled_shader * shader)103 iris_delete_shader_variant(struct iris_compiled_shader *shader)
104 {
105    pipe_resource_reference(&shader->assembly.res, NULL);
106    util_queue_fence_destroy(&shader->ready);
107    ralloc_free(shader);
108 }
109 
110 struct iris_compiled_shader *
iris_create_shader_variant(const struct iris_screen * screen,void * mem_ctx,gl_shader_stage stage,enum iris_program_cache_id cache_id,uint32_t key_size,const void * key)111 iris_create_shader_variant(const struct iris_screen *screen,
112                            void *mem_ctx,
113                            gl_shader_stage stage,
114                            enum iris_program_cache_id cache_id,
115                            uint32_t key_size,
116                            const void *key)
117 {
118 #ifndef NDEBUG
119    if (cache_id == IRIS_CACHE_BLORP) {
120       /* Blorp shader must have a mem_ctx. */
121       assert(mem_ctx != NULL);
122    } else if (cache_id == IRIS_CACHE_TCS) {
123       /* Pass-through tessellation control shaders (generated by the driver)
124        * will have a mem_ctx, and other tessellation control shaders will not.
125        */
126    } else {
127       /* Shaders that are neither blorp nor tessellation control must not have
128        * a mem_ctx.
129        */
130       assert(mem_ctx == NULL);
131    }
132 #endif
133 
134    struct iris_compiled_shader *shader =
135       rzalloc_size(mem_ctx, sizeof(struct iris_compiled_shader) +
136                    screen->vtbl.derived_program_state_size(cache_id));
137 
138    pipe_reference_init(&shader->ref, 1);
139    util_queue_fence_init(&shader->ready);
140    util_queue_fence_reset(&shader->ready);
141 
142    if (cache_id != IRIS_CACHE_BLORP) {
143       assert(key_size <= sizeof(union iris_any_prog_key));
144       memcpy(&shader->key, key, key_size);
145    }
146 
147    shader->stage = stage;
148 
149    return shader;
150 }
151 
152 void
iris_upload_shader(struct iris_screen * screen,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct hash_table * driver_shaders,struct u_upload_mgr * uploader,enum iris_program_cache_id cache_id,uint32_t key_size,const void * key,const void * assembly)153 iris_upload_shader(struct iris_screen *screen,
154                    struct iris_uncompiled_shader *ish,
155                    struct iris_compiled_shader *shader,
156                    struct hash_table *driver_shaders,
157                    struct u_upload_mgr *uploader,
158                    enum iris_program_cache_id cache_id,
159                    uint32_t key_size,
160                    const void *key,
161                    const void *assembly)
162 {
163    const struct intel_device_info *devinfo = screen->devinfo;
164 
165    u_upload_alloc(uploader, 0, shader->program_size, 64,
166                   &shader->assembly.offset, &shader->assembly.res,
167                   &shader->map);
168    memcpy(shader->map, assembly, shader->program_size);
169 
170    struct iris_resource *res = (void *) shader->assembly.res;
171    uint64_t shader_data_addr = res->bo->address +
172                                shader->assembly.offset +
173                                shader->const_data_offset;
174 
175    if (screen->brw) {
176       struct brw_shader_reloc_value reloc_values[] = {
177          {
178             .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
179             .value = shader_data_addr,
180          },
181          {
182             .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
183             .value = shader_data_addr >> 32,
184          },
185       };
186       brw_write_shader_relocs(&screen->brw->isa, shader->map,
187                               shader->brw_prog_data, reloc_values,
188                               ARRAY_SIZE(reloc_values));
189    } else {
190       struct elk_shader_reloc_value reloc_values[] = {
191          {
192             .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
193             .value = shader_data_addr,
194          },
195          {
196             .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
197             .value = shader_data_addr >> 32,
198          },
199       };
200       elk_write_shader_relocs(&screen->elk->isa, shader->map,
201                               shader->elk_prog_data, reloc_values,
202                               ARRAY_SIZE(reloc_values));
203    }
204 
205    /* Store the 3DSTATE shader packets and other derived state. */
206    screen->vtbl.store_derived_program_state(devinfo, cache_id, shader);
207 
208    util_queue_fence_signal(&shader->ready);
209 
210    if (!ish) {
211       struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
212       _mesa_hash_table_insert(driver_shaders, keybox, shader);
213    }
214 }
215 
216 bool
iris_blorp_lookup_shader(struct blorp_batch * blorp_batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)217 iris_blorp_lookup_shader(struct blorp_batch *blorp_batch,
218                          const void *key, uint32_t key_size,
219                          uint32_t *kernel_out, void *prog_data_out)
220 {
221    struct blorp_context *blorp = blorp_batch->blorp;
222    struct iris_context *ice = blorp->driver_ctx;
223    struct iris_batch *batch = blorp_batch->driver_batch;
224    struct iris_screen *screen = batch->screen;
225    struct iris_compiled_shader *shader =
226       iris_find_cached_shader(ice, IRIS_CACHE_BLORP, key_size, key);
227 
228    if (!shader)
229       return false;
230 
231    struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
232    *kernel_out =
233       iris_bo_offset_from_base_address(bo) + shader->assembly.offset;
234    *((void **) prog_data_out) = screen->brw ? (void *)shader->brw_prog_data
235                                             : (void *)shader->elk_prog_data;
236 
237    iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
238 
239    return true;
240 }
241 
242 bool
iris_blorp_upload_shader(struct blorp_batch * blorp_batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,UNUSED uint32_t kernel_size,const void * prog_data_templ,UNUSED uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)243 iris_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
244                          const void *key, uint32_t key_size,
245                          const void *kernel, UNUSED uint32_t kernel_size,
246                          const void *prog_data_templ,
247                          UNUSED uint32_t prog_data_size,
248                          uint32_t *kernel_out, void *prog_data_out)
249 {
250    struct blorp_context *blorp = blorp_batch->blorp;
251    struct iris_context *ice = blorp->driver_ctx;
252    struct iris_batch *batch = blorp_batch->driver_batch;
253    struct iris_screen *screen = batch->screen;
254 
255    struct iris_binding_table bt;
256    memset(&bt, 0, sizeof(bt));
257 
258    struct iris_compiled_shader *shader =
259       iris_create_shader_variant(screen, ice->shaders.cache, stage,
260                                  IRIS_CACHE_BLORP, key_size, key);
261 
262    void *prog_data = ralloc_size(NULL, prog_data_size);
263    memcpy(prog_data, prog_data_templ, prog_data_size);
264 
265    if (screen->brw) {
266       iris_apply_brw_prog_data(shader, prog_data);
267    } else {
268       assert(screen->elk);
269       iris_apply_elk_prog_data(shader, prog_data);
270    }
271 
272    iris_finalize_program(shader, NULL, NULL, 0, 0, 0, &bt);
273 
274    iris_upload_shader(screen, NULL, shader, ice->shaders.cache,
275                       ice->shaders.uploader_driver,
276                       IRIS_CACHE_BLORP, key_size, key, kernel);
277 
278    struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
279    *kernel_out =
280       iris_bo_offset_from_base_address(bo) + shader->assembly.offset;
281    *((void **) prog_data_out) = screen->brw ? (void *)shader->brw_prog_data
282                                             : (void *)shader->elk_prog_data;
283 
284    iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
285 
286    return true;
287 }
288 
289 void
iris_init_program_cache(struct iris_context * ice)290 iris_init_program_cache(struct iris_context *ice)
291 {
292    ice->shaders.cache =
293       _mesa_hash_table_create(ice, keybox_hash, keybox_equals);
294 
295    ice->shaders.uploader_driver =
296       u_upload_create(&ice->ctx, 64 * 1024,
297                       PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
298                       IRIS_RESOURCE_FLAG_SHADER_MEMZONE |
299                       IRIS_RESOURCE_FLAG_DEVICE_MEM);
300    ice->shaders.uploader_unsync =
301       u_upload_create(&ice->ctx, 64 * 1024,
302                       PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
303                       IRIS_RESOURCE_FLAG_SHADER_MEMZONE |
304                       IRIS_RESOURCE_FLAG_DEVICE_MEM);
305 }
306 
307 void
iris_destroy_program_cache(struct iris_context * ice)308 iris_destroy_program_cache(struct iris_context *ice)
309 {
310    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
311       iris_shader_variant_reference(&ice->shaders.prog[i], NULL);
312    }
313    iris_shader_variant_reference(&ice->shaders.last_vue_shader, NULL);
314 
315    hash_table_foreach(ice->shaders.cache, entry) {
316       struct iris_compiled_shader *shader = entry->data;
317       iris_delete_shader_variant(shader);
318    }
319 
320    u_upload_destroy(ice->shaders.uploader_driver);
321    u_upload_destroy(ice->shaders.uploader_unsync);
322 
323    ralloc_free(ice->shaders.cache);
324 }
325 
326 static void
link_libintel_shaders(nir_shader * nir,const nir_shader * libintel)327 link_libintel_shaders(nir_shader *nir, const nir_shader *libintel)
328 {
329    nir_link_shader_functions(nir, libintel);
330    NIR_PASS_V(nir, nir_inline_functions);
331    NIR_PASS_V(nir, nir_remove_non_entrypoints);
332    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
333               glsl_get_cl_type_size_align);
334    NIR_PASS_V(nir, nir_opt_deref);
335    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
336    NIR_PASS_V(nir, nir_lower_explicit_io,
337               nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
338                  nir_var_mem_global,
339               nir_address_format_62bit_generic);
340 }
341 
342 void
iris_ensure_indirect_generation_shader(struct iris_batch * batch)343 iris_ensure_indirect_generation_shader(struct iris_batch *batch)
344 {
345    struct iris_context *ice = batch->ice;
346    if (ice->draw.generation.shader)
347       return;
348 
349    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
350    const struct {
351       char name[40];
352    } key = {
353       .name = "iris-generation-shader",
354    };
355    ice->draw.generation.shader =
356       iris_find_cached_shader(ice, IRIS_CACHE_BLORP, sizeof(key), &key);
357    if (ice->draw.generation.shader != NULL)
358       return;
359 
360    const nir_shader_compiler_options *nir_options =
361       screen->brw ? screen->brw->nir_options[MESA_SHADER_COMPUTE]
362                   : screen->elk->nir_options[MESA_SHADER_COMPUTE];
363 
364    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
365                                                   nir_options,
366                                                   "iris-indirect-generate");
367 
368    uint32_t uniform_size =
369       screen->vtbl.call_generation_shader(screen, &b);
370 
371    nir_shader *nir = b.shader;
372 
373    void *mem_ctx = ralloc_context(NULL);
374    link_libintel_shaders(nir, screen->vtbl.load_shader_lib(screen, mem_ctx));
375 
376    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
377    NIR_PASS_V(nir, nir_opt_cse);
378    NIR_PASS_V(nir, nir_opt_gcm, true);
379    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
380 
381    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
382 
383    NIR_PASS_V(nir, nir_split_var_copies);
384    NIR_PASS_V(nir, nir_split_per_member_structs);
385 
386    if (screen->brw) {
387       struct brw_nir_compiler_opts opts = {};
388       brw_preprocess_nir(screen->brw, nir, &opts);
389    } else {
390       assert(screen->elk);
391       struct elk_nir_compiler_opts opts = {};
392       elk_preprocess_nir(screen->elk, nir, &opts);
393    }
394 
395    NIR_PASS_V(nir, nir_propagate_invariant, false);
396 
397    NIR_PASS_V(nir, nir_lower_input_attachments,
398               &(nir_input_attachment_options) {
399                  .use_fragcoord_sysval = true,
400                  .use_layer_id_sysval = true,
401               });
402 
403    /* Reset sizes before gathering information */
404    nir->global_mem_size = 0;
405    nir->scratch_size = 0;
406    nir->info.shared_size = 0;
407    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
408 
409    NIR_PASS_V(nir, nir_copy_prop);
410    NIR_PASS_V(nir, nir_opt_constant_folding);
411    NIR_PASS_V(nir, nir_opt_dce);
412 
413    /* Do vectorizing here. For some reason when trying to do it in the back
414     * this just isn't working.
415     */
416    nir_load_store_vectorize_options options = {
417       .modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
418       .callback = brw_nir_should_vectorize_mem,
419       .robust_modes = (nir_variable_mode)0,
420    };
421    NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
422 
423    nir->num_uniforms = uniform_size;
424 
425    struct iris_compiled_shader *shader =
426       iris_create_shader_variant(screen, ice->shaders.cache,
427                                  MESA_SHADER_FRAGMENT,
428                                  IRIS_CACHE_BLORP,
429                                  sizeof(key), &key);
430 
431    const unsigned *program;
432    if (screen->brw) {
433       union brw_any_prog_key prog_key;
434       memset(&prog_key, 0, sizeof(prog_key));
435 
436       struct brw_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
437       memset(prog_data, 0, sizeof(*prog_data));
438       prog_data->base.nr_params = nir->num_uniforms / 4;
439 
440       brw_nir_analyze_ubo_ranges(screen->brw, nir, prog_data->base.ubo_ranges);
441 
442       struct brw_compile_stats stats[3];
443       struct brw_compile_fs_params params = {
444          .base = {
445             .nir = nir,
446             .log_data = &ice->dbg,
447             .debug_flag = DEBUG_WM,
448             .stats = stats,
449             .mem_ctx = mem_ctx,
450          },
451          .key = &prog_key.wm,
452          .prog_data = prog_data,
453       };
454       program = brw_compile_fs(screen->brw, &params);
455       assert(program);
456       iris_apply_brw_prog_data(shader, &prog_data->base);
457    } else {
458       union elk_any_prog_key prog_key;
459       memset(&prog_key, 0, sizeof(prog_key));
460 
461       struct elk_wm_prog_data *prog_data = ralloc_size(NULL, sizeof(*prog_data));
462       memset(prog_data, 0, sizeof(*prog_data));
463       prog_data->base.nr_params = nir->num_uniforms / 4;
464 
465       elk_nir_analyze_ubo_ranges(screen->elk, nir, prog_data->base.ubo_ranges);
466 
467       struct elk_compile_stats stats[3];
468       struct elk_compile_fs_params params = {
469          .base = {
470             .nir = nir,
471             .log_data = &ice->dbg,
472             .debug_flag = DEBUG_WM,
473             .stats = stats,
474             .mem_ctx = mem_ctx,
475          },
476          .key = &prog_key.wm,
477          .prog_data = prog_data,
478       };
479       program = elk_compile_fs(screen->elk, &params);
480       assert(program);
481       iris_apply_elk_prog_data(shader, &prog_data->base);
482    }
483 
484    struct iris_binding_table bt;
485    memset(&bt, 0, sizeof(bt));
486 
487    iris_finalize_program(shader, NULL, NULL, 0, 0, 0, &bt);
488 
489    iris_upload_shader(screen, NULL, shader, ice->shaders.cache,
490                       ice->shaders.uploader_driver,
491                       IRIS_CACHE_BLORP, sizeof(key), &key, program);
492 
493    ralloc_free(mem_ctx);
494 
495    struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
496    iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
497 
498    ice->draw.generation.shader = shader;
499 }
500