xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_pipeline_cache.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/blob.h"
25 #include "util/hash_table.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "nir/nir_serialize.h"
30 #include "anv_private.h"
31 #include "nir/nir_xfb_info.h"
32 #include "vk_util.h"
33 #include "compiler/spirv/nir_spirv.h"
34 #include "shaders/float64_spv.h"
35 
36 /**
37  * Embedded sampler management.
38  */
39 
40 static unsigned
embedded_sampler_key_hash(const void * key)41 embedded_sampler_key_hash(const void *key)
42 {
43    return _mesa_hash_data(key, sizeof(struct anv_embedded_sampler_key));
44 }
45 
46 static bool
embedded_sampler_key_equal(const void * a,const void * b)47 embedded_sampler_key_equal(const void *a, const void *b)
48 {
49    return memcmp(a, b, sizeof(struct anv_embedded_sampler_key)) == 0;
50 }
51 
52 static void
anv_embedded_sampler_free(struct anv_device * device,struct anv_embedded_sampler * sampler)53 anv_embedded_sampler_free(struct anv_device *device,
54                           struct anv_embedded_sampler *sampler)
55 {
56    anv_state_pool_free(&device->dynamic_state_pool, sampler->sampler_state);
57    anv_state_pool_free(&device->dynamic_state_pool, sampler->border_color_state);
58    vk_free(&device->vk.alloc, sampler);
59 }
60 
61 static struct anv_embedded_sampler *
anv_embedded_sampler_ref(struct anv_embedded_sampler * sampler)62 anv_embedded_sampler_ref(struct anv_embedded_sampler *sampler)
63 {
64    sampler->ref_cnt++;
65    return sampler;
66 }
67 
68 static void
anv_embedded_sampler_unref(struct anv_device * device,struct anv_embedded_sampler * sampler)69 anv_embedded_sampler_unref(struct anv_device *device,
70                            struct anv_embedded_sampler *sampler)
71 {
72    simple_mtx_lock(&device->embedded_samplers.mutex);
73    if (--sampler->ref_cnt == 0) {
74       _mesa_hash_table_remove_key(device->embedded_samplers.map,
75                                   &sampler->key);
76       anv_embedded_sampler_free(device, sampler);
77    }
78    simple_mtx_unlock(&device->embedded_samplers.mutex);
79 }
80 
81 void
anv_device_init_embedded_samplers(struct anv_device * device)82 anv_device_init_embedded_samplers(struct anv_device *device)
83 {
84    simple_mtx_init(&device->embedded_samplers.mutex, mtx_plain);
85    device->embedded_samplers.map =
86       _mesa_hash_table_create(NULL,
87                               embedded_sampler_key_hash,
88                               embedded_sampler_key_equal);
89 }
90 
91 void
anv_device_finish_embedded_samplers(struct anv_device * device)92 anv_device_finish_embedded_samplers(struct anv_device *device)
93 {
94    hash_table_foreach(device->embedded_samplers.map, entry) {
95       anv_embedded_sampler_free(device, entry->data);
96    }
97    ralloc_free(device->embedded_samplers.map);
98    simple_mtx_destroy(&device->embedded_samplers.mutex);
99 }
100 
101 static VkResult
anv_shader_bin_get_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map)102 anv_shader_bin_get_embedded_samplers(struct anv_device *device,
103                                      struct anv_shader_bin *shader,
104                                      const struct anv_pipeline_bind_map *bind_map)
105 {
106    VkResult result = VK_SUCCESS;
107 
108    simple_mtx_lock(&device->embedded_samplers.mutex);
109 
110    for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
111       struct hash_entry *entry =
112          _mesa_hash_table_search(device->embedded_samplers.map,
113                                  &bind_map->embedded_sampler_to_binding[i].key);
114       if (entry == NULL) {
115          shader->embedded_samplers[i] =
116             vk_zalloc(&device->vk.alloc,
117                       sizeof(struct anv_embedded_sampler), 8,
118                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
119          if (shader->embedded_samplers[i] == NULL) {
120             result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
121             goto err;
122          }
123 
124          anv_genX(device->info, emit_embedded_sampler)(
125             device, shader->embedded_samplers[i],
126             &bind_map->embedded_sampler_to_binding[i]);
127          _mesa_hash_table_insert(device->embedded_samplers.map,
128                                  &shader->embedded_samplers[i]->key,
129                                  shader->embedded_samplers[i]);
130       } else {
131          shader->embedded_samplers[i] = anv_embedded_sampler_ref(entry->data);
132       }
133    }
134 
135  err:
136    simple_mtx_unlock(&device->embedded_samplers.mutex);
137    return result;
138 }
139 
140 /**
141  *
142  */
143 
144 static bool
145 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
146                          struct blob *blob);
147 
148 struct vk_pipeline_cache_object *
149 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
150                            const void *key_data, size_t key_size,
151                            struct blob_reader *blob);
152 
153 static void
anv_shader_bin_destroy(struct vk_device * _device,struct vk_pipeline_cache_object * object)154 anv_shader_bin_destroy(struct vk_device *_device,
155                        struct vk_pipeline_cache_object *object)
156 {
157    struct anv_device *device =
158       container_of(_device, struct anv_device, vk);
159 
160    struct anv_shader_bin *shader =
161       container_of(object, struct anv_shader_bin, base);
162 
163    for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
164       anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
165 
166    anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
167    vk_pipeline_cache_object_finish(&shader->base);
168    vk_free(&device->vk.alloc, shader);
169 }
170 
171 static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
172    .serialize = anv_shader_bin_serialize,
173    .deserialize = anv_shader_bin_deserialize,
174    .destroy = anv_shader_bin_destroy,
175 };
176 
177 const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
178    &anv_shader_bin_ops,
179    NULL
180 };
181 
182 static void
anv_shader_bin_rewrite_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map,const struct brw_stage_prog_data * prog_data_in)183 anv_shader_bin_rewrite_embedded_samplers(struct anv_device *device,
184                                          struct anv_shader_bin *shader,
185                                          const struct anv_pipeline_bind_map *bind_map,
186                                          const struct brw_stage_prog_data *prog_data_in)
187 {
188    int rv_count = 0;
189    struct brw_shader_reloc_value reloc_values[BRW_MAX_EMBEDDED_SAMPLERS];
190 
191    for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
192       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
193          .id = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + i,
194          .value = shader->embedded_samplers[i]->sampler_state.offset,
195       };
196    }
197 
198    brw_write_shader_relocs(&device->physical->compiler->isa,
199                            shader->kernel.map, prog_data_in,
200                            reloc_values, rv_count);
201 }
202 
203 static uint32_t
brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data * prog_data)204 brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data *prog_data)
205 {
206    uint32_t count = 0;
207    for (unsigned i = 0; i < prog_data->printf_info_count; i++)
208       count += prog_data->printf_info[i].num_args;
209    return count;
210 }
211 
212 static uint32_t
brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data * prog_data)213 brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data *prog_data)
214 {
215    uint32_t size = 0;
216    for (unsigned i = 0; i < prog_data->printf_info_count; i++)
217       size += prog_data->printf_info[i].string_size;
218    return size;
219 }
220 
221 static void
copy_uprintf(u_printf_info * out_infos,unsigned * out_arg_sizes,char * out_strings,const struct brw_stage_prog_data * prog_data)222 copy_uprintf(u_printf_info *out_infos,
223              unsigned *out_arg_sizes,
224              char  *out_strings,
225              const struct brw_stage_prog_data *prog_data)
226 {
227    for (unsigned i = 0; i < prog_data->printf_info_count; i++) {
228       out_infos[i] = prog_data->printf_info[i];
229       out_infos[i].arg_sizes = out_arg_sizes;
230       memcpy(out_infos[i].arg_sizes,
231              prog_data->printf_info[i].arg_sizes,
232              sizeof(out_infos[i].arg_sizes[0]) * prog_data->printf_info[i].num_args);
233       out_infos[i].strings = out_strings;
234       memcpy(out_infos[i].strings,
235              prog_data->printf_info[i].strings,
236              prog_data->printf_info[i].string_size);
237 
238       out_arg_sizes += prog_data->printf_info[i].num_args;
239       out_strings += prog_data->printf_info[i].string_size;
240    }
241 }
242 
243 static struct anv_shader_bin *
anv_shader_bin_create(struct anv_device * device,gl_shader_stage stage,const void * key_data,uint32_t key_size,const void * kernel_data,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_in,uint32_t prog_data_size,const struct brw_compile_stats * stats,uint32_t num_stats,const nir_xfb_info * xfb_info_in,const struct anv_pipeline_bind_map * bind_map,const struct anv_push_descriptor_info * push_desc_info,enum anv_dynamic_push_bits dynamic_push_values)244 anv_shader_bin_create(struct anv_device *device,
245                       gl_shader_stage stage,
246                       const void *key_data, uint32_t key_size,
247                       const void *kernel_data, uint32_t kernel_size,
248                       const struct brw_stage_prog_data *prog_data_in,
249                       uint32_t prog_data_size,
250                       const struct brw_compile_stats *stats, uint32_t num_stats,
251                       const nir_xfb_info *xfb_info_in,
252                       const struct anv_pipeline_bind_map *bind_map,
253                       const struct anv_push_descriptor_info *push_desc_info,
254                       enum anv_dynamic_push_bits dynamic_push_values)
255 {
256    VK_MULTIALLOC(ma);
257    VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
258    VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
259    VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
260                                 prog_data_size);
261    VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
262                            prog_data_in->num_relocs);
263    VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
264 
265    VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
266                                 xfb_info_in == NULL ? 0 :
267                                 nir_xfb_info_size(xfb_info_in->output_count));
268 
269    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
270                            bind_map->surface_count);
271    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
272                       bind_map->sampler_count);
273    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding,
274                       embedded_sampler_to_binding,
275                       bind_map->embedded_sampler_count);
276    VK_MULTIALLOC_DECL(&ma, struct brw_kernel_arg_desc, kernel_args,
277                       bind_map->kernel_arg_count);
278    VK_MULTIALLOC_DECL(&ma, struct anv_embedded_sampler *, embedded_samplers,
279                       bind_map->embedded_sampler_count);
280    VK_MULTIALLOC_DECL(&ma, u_printf_info, printf_infos,
281                       INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
282                       prog_data_in->printf_info_count : 0);
283    VK_MULTIALLOC_DECL(&ma, unsigned, arg_sizes,
284                       INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
285                       brw_stage_prog_data_printf_num_args(prog_data_in) : 0);
286    VK_MULTIALLOC_DECL(&ma, char, strings,
287                       INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
288                       brw_stage_prog_data_printf_string_size(prog_data_in) : 0);
289 
290    if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
291                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
292       return NULL;
293 
294    memcpy(obj_key_data, key_data, key_size);
295    vk_pipeline_cache_object_init(&device->vk, &shader->base,
296                                  &anv_shader_bin_ops, obj_key_data, key_size);
297 
298    shader->stage = stage;
299 
300    shader->kernel =
301       anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
302    memcpy(shader->kernel.map, kernel_data, kernel_size);
303    shader->kernel_size = kernel_size;
304 
305    if (bind_map->embedded_sampler_count > 0) {
306       shader->embedded_samplers = embedded_samplers;
307       if (anv_shader_bin_get_embedded_samplers(device, shader, bind_map) != VK_SUCCESS) {
308          anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
309          vk_free(&device->vk.alloc, shader);
310          return NULL;
311       }
312    }
313 
314    uint64_t shader_data_addr =
315       device->physical->va.instruction_state_pool.addr +
316       shader->kernel.offset +
317       prog_data_in->const_data_offset;
318 
319    int rv_count = 0;
320    struct brw_shader_reloc_value reloc_values[9];
321    assert((device->physical->va.dynamic_visible_pool.addr & 0xffffffff) == 0);
322    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
323       .id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
324       .value = device->physical->va.dynamic_visible_pool.addr >> 32,
325    };
326    assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
327    assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
328    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
329       .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
330       .value = device->physical->indirect_descriptors ?
331                (device->physical->va.indirect_descriptor_pool.addr >> 32) :
332                (device->physical->va.internal_surface_state_pool.addr >> 32),
333    };
334    assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
335    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
336       .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
337       .value = shader_data_addr,
338    };
339    assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
340    assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
341    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
342       .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
343       .value = device->physical->va.instruction_state_pool.addr >> 32,
344    };
345    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
346       .id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
347       .value = shader->kernel.offset,
348    };
349    if (brw_shader_stage_is_bindless(stage)) {
350       const struct brw_bs_prog_data *bs_prog_data =
351          brw_bs_prog_data_const(prog_data_in);
352       uint64_t resume_sbt_addr =
353          device->physical->va.instruction_state_pool.addr +
354          shader->kernel.offset +
355          bs_prog_data->resume_sbt_offset;
356       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
357          .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
358          .value = resume_sbt_addr,
359       };
360       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
361          .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
362          .value = resume_sbt_addr >> 32,
363       };
364    }
365 
366    if (INTEL_DEBUG(DEBUG_SHADER_PRINT) && prog_data_in->printf_info_count > 0) {
367       assert(device->printf.bo != NULL);
368 
369       copy_uprintf(printf_infos, arg_sizes, strings, prog_data_in);
370 
371       simple_mtx_lock(&device->printf.mutex);
372 
373       uint32_t base_printf_idx =
374          util_dynarray_num_elements(&device->printf.prints, u_printf_info*);
375       for (uint32_t i = 0; i < prog_data_in->printf_info_count; i++) {
376          util_dynarray_append(&device->printf.prints, u_printf_info *,
377                               &printf_infos[i]);
378       }
379 
380       simple_mtx_unlock(&device->printf.mutex);
381 
382       /* u_printf expects the string IDs to start at 1. */
383       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
384          .id = BRW_SHADER_RELOC_PRINTF_BASE_IDENTIFIER,
385          .value = base_printf_idx,
386       };
387       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
388          .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_LOW,
389          .value = device->printf.bo->offset & 0xffffffff,
390       };
391       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
392          .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_HIGH,
393          .value = device->printf.bo->offset >> 32,
394       };
395    } else if (prog_data_in->printf_info_count > 0) {
396       unreachable("shader with printf intrinsics requires INTEL_DEBUG=shader-print");
397    }
398 
399    brw_write_shader_relocs(&device->physical->compiler->isa,
400                            shader->kernel.map, prog_data_in,
401                            reloc_values, rv_count);
402 
403    anv_shader_bin_rewrite_embedded_samplers(device, shader, bind_map, prog_data_in);
404 
405    memcpy(prog_data, prog_data_in, prog_data_size);
406    typed_memcpy(prog_data_relocs, prog_data_in->relocs,
407                 prog_data_in->num_relocs);
408    prog_data->relocs = prog_data_relocs;
409    memset(prog_data_param, 0,
410           prog_data->nr_params * sizeof(*prog_data_param));
411    prog_data->param = prog_data_param;
412    prog_data->printf_info = printf_infos;
413    shader->prog_data = prog_data;
414    shader->prog_data_size = prog_data_size;
415 
416    assert(num_stats <= ARRAY_SIZE(shader->stats));
417    typed_memcpy(shader->stats, stats, num_stats);
418    shader->num_stats = num_stats;
419 
420    if (xfb_info_in) {
421       *xfb_info = *xfb_info_in;
422       typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
423                    xfb_info_in->output_count);
424       shader->xfb_info = xfb_info;
425    } else {
426       shader->xfb_info = NULL;
427    }
428 
429    shader->dynamic_push_values = dynamic_push_values;
430 
431    typed_memcpy(&shader->push_desc_info, push_desc_info, 1);
432 
433    shader->bind_map = *bind_map;
434 
435    typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
436                 bind_map->surface_count);
437    shader->bind_map.surface_to_descriptor = surface_to_descriptor;
438 
439    typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
440                 bind_map->sampler_count);
441    shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
442 
443    typed_memcpy(embedded_sampler_to_binding, bind_map->embedded_sampler_to_binding,
444                 bind_map->embedded_sampler_count);
445    shader->bind_map.embedded_sampler_to_binding = embedded_sampler_to_binding;
446 
447    typed_memcpy(kernel_args, bind_map->kernel_args,
448                 bind_map->kernel_arg_count);
449    shader->bind_map.kernel_args = kernel_args;
450 
451    return shader;
452 }
453 
454 static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)455 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
456                          struct blob *blob)
457 {
458    struct anv_shader_bin *shader =
459       container_of(object, struct anv_shader_bin, base);
460 
461    blob_write_uint32(blob, shader->stage);
462 
463    blob_write_uint32(blob, shader->kernel_size);
464    blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
465 
466    blob_write_uint32(blob, shader->prog_data_size);
467 
468    union brw_any_prog_data prog_data;
469    assert(shader->prog_data_size <= sizeof(prog_data));
470    memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
471    prog_data.base.relocs = NULL;
472    prog_data.base.param = NULL;
473    blob_write_bytes(blob, &prog_data, shader->prog_data_size);
474 
475    blob_write_bytes(blob, shader->prog_data->relocs,
476                     shader->prog_data->num_relocs *
477                     sizeof(shader->prog_data->relocs[0]));
478    nir_serialize_printf_info(blob, shader->prog_data->printf_info,
479                              shader->prog_data->printf_info_count);
480 
481    blob_write_uint32(blob, shader->num_stats);
482    blob_write_bytes(blob, shader->stats,
483                     shader->num_stats * sizeof(shader->stats[0]));
484 
485    if (shader->xfb_info) {
486       uint32_t xfb_info_size =
487          nir_xfb_info_size(shader->xfb_info->output_count);
488       blob_write_uint32(blob, xfb_info_size);
489       blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
490    } else {
491       blob_write_uint32(blob, 0);
492    }
493 
494    blob_write_uint32(blob, shader->dynamic_push_values);
495 
496    blob_write_uint32(blob, shader->push_desc_info.used_descriptors);
497    blob_write_uint32(blob, shader->push_desc_info.fully_promoted_ubo_descriptors);
498    blob_write_uint8(blob, shader->push_desc_info.used_set_buffer);
499 
500    blob_write_bytes(blob, shader->bind_map.surface_sha1,
501                     sizeof(shader->bind_map.surface_sha1));
502    blob_write_bytes(blob, shader->bind_map.sampler_sha1,
503                     sizeof(shader->bind_map.sampler_sha1));
504    blob_write_bytes(blob, shader->bind_map.push_sha1,
505                     sizeof(shader->bind_map.push_sha1));
506    blob_write_uint32(blob, shader->bind_map.surface_count);
507    blob_write_uint32(blob, shader->bind_map.sampler_count);
508    blob_write_uint32(blob, shader->bind_map.embedded_sampler_count);
509    if (shader->stage == MESA_SHADER_KERNEL) {
510       uint32_t packed = (uint32_t)shader->bind_map.kernel_args_size << 16 |
511                         (uint32_t)shader->bind_map.kernel_arg_count;
512       blob_write_uint32(blob, packed);
513    }
514    blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
515                     shader->bind_map.surface_count *
516                     sizeof(*shader->bind_map.surface_to_descriptor));
517    blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
518                     shader->bind_map.sampler_count *
519                     sizeof(*shader->bind_map.sampler_to_descriptor));
520    blob_write_bytes(blob, shader->bind_map.embedded_sampler_to_binding,
521                     shader->bind_map.embedded_sampler_count *
522                     sizeof(*shader->bind_map.embedded_sampler_to_binding));
523    blob_write_bytes(blob, shader->bind_map.kernel_args,
524                     shader->bind_map.kernel_arg_count *
525                     sizeof(*shader->bind_map.kernel_args));
526    blob_write_bytes(blob, shader->bind_map.push_ranges,
527                     sizeof(shader->bind_map.push_ranges));
528 
529    return !blob->out_of_memory;
530 }
531 
532 struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)533 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
534                            const void *key_data, size_t key_size,
535                            struct blob_reader *blob)
536 {
537    struct anv_device *device =
538       container_of(cache->base.device, struct anv_device, vk);
539 
540    gl_shader_stage stage = blob_read_uint32(blob);
541 
542    uint32_t kernel_size = blob_read_uint32(blob);
543    const void *kernel_data = blob_read_bytes(blob, kernel_size);
544 
545    uint32_t prog_data_size = blob_read_uint32(blob);
546    const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
547    if (blob->overrun)
548       return NULL;
549 
550    union brw_any_prog_data prog_data;
551    memcpy(&prog_data, prog_data_bytes,
552           MIN2(sizeof(prog_data), prog_data_size));
553    prog_data.base.relocs =
554       blob_read_bytes(blob, prog_data.base.num_relocs *
555                             sizeof(prog_data.base.relocs[0]));
556 
557    void *mem_ctx = ralloc_context(NULL);
558    prog_data.base.printf_info =
559       nir_deserialize_printf_info(mem_ctx, blob,
560                                   &prog_data.base.printf_info_count);
561 
562    uint32_t num_stats = blob_read_uint32(blob);
563    const struct brw_compile_stats *stats =
564       blob_read_bytes(blob, num_stats * sizeof(stats[0]));
565 
566    const nir_xfb_info *xfb_info = NULL;
567    uint32_t xfb_size = blob_read_uint32(blob);
568    if (xfb_size)
569       xfb_info = blob_read_bytes(blob, xfb_size);
570 
571    enum anv_dynamic_push_bits dynamic_push_values = blob_read_uint32(blob);
572 
573    struct anv_push_descriptor_info push_desc_info = {};
574    push_desc_info.used_descriptors = blob_read_uint32(blob);
575    push_desc_info.fully_promoted_ubo_descriptors = blob_read_uint32(blob);
576    push_desc_info.used_set_buffer = blob_read_uint8(blob);
577 
578    struct anv_pipeline_bind_map bind_map = {};
579    blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
580    blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
581    blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
582    bind_map.surface_count = blob_read_uint32(blob);
583    bind_map.sampler_count = blob_read_uint32(blob);
584    bind_map.embedded_sampler_count = blob_read_uint32(blob);
585    if (stage == MESA_SHADER_KERNEL) {
586       uint32_t packed = blob_read_uint32(blob);
587       bind_map.kernel_args_size = (uint16_t)(packed >> 16);
588       bind_map.kernel_arg_count = (uint16_t)packed;
589    }
590    bind_map.surface_to_descriptor = (void *)
591       blob_read_bytes(blob, bind_map.surface_count *
592                             sizeof(*bind_map.surface_to_descriptor));
593    bind_map.sampler_to_descriptor = (void *)
594       blob_read_bytes(blob, bind_map.sampler_count *
595                             sizeof(*bind_map.sampler_to_descriptor));
596    bind_map.embedded_sampler_to_binding = (void *)
597       blob_read_bytes(blob, bind_map.embedded_sampler_count *
598                             sizeof(*bind_map.embedded_sampler_to_binding));
599    bind_map.kernel_args = (void *)
600       blob_read_bytes(blob, bind_map.kernel_arg_count *
601                             sizeof(*bind_map.kernel_args));
602    blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
603 
604    if (blob->overrun) {
605       ralloc_free(mem_ctx);
606       return NULL;
607    }
608 
609    struct anv_shader_bin *shader =
610       anv_shader_bin_create(device, stage,
611                             key_data, key_size,
612                             kernel_data, kernel_size,
613                             &prog_data.base, prog_data_size,
614                             stats, num_stats, xfb_info, &bind_map,
615                             &push_desc_info,
616                             dynamic_push_values);
617 
618    ralloc_free(mem_ctx);
619 
620    if (shader == NULL)
621       return NULL;
622 
623    return &shader->base;
624 }
625 
626 struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const void * key_data,uint32_t key_size,bool * user_cache_hit)627 anv_device_search_for_kernel(struct anv_device *device,
628                              struct vk_pipeline_cache *cache,
629                              const void *key_data, uint32_t key_size,
630                              bool *user_cache_hit)
631 {
632    /* Use the default pipeline cache if none is specified */
633    if (cache == NULL)
634       cache = device->vk.mem_cache;
635 
636    bool cache_hit = false;
637    struct vk_pipeline_cache_object *object =
638       vk_pipeline_cache_lookup_object(cache, key_data, key_size,
639                                       &anv_shader_bin_ops, &cache_hit);
640    if (user_cache_hit != NULL) {
641       *user_cache_hit = object != NULL && cache_hit &&
642                         cache != device->vk.mem_cache;
643    }
644 
645    if (object == NULL)
646       return NULL;
647 
648    return container_of(object, struct anv_shader_bin, base);
649 }
650 
651 struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const struct anv_shader_upload_params * params)652 anv_device_upload_kernel(struct anv_device *device,
653                          struct vk_pipeline_cache *cache,
654                          const struct anv_shader_upload_params *params)
655 {
656    /* Use the default pipeline cache if none is specified */
657    if (cache == NULL)
658       cache = device->vk.mem_cache;
659 
660    struct anv_shader_bin *shader =
661       anv_shader_bin_create(device,
662                             params->stage,
663                             params->key_data,
664                             params->key_size,
665                             params->kernel_data,
666                             params->kernel_size,
667                             params->prog_data,
668                             params->prog_data_size,
669                             params->stats,
670                             params->num_stats,
671                             params->xfb_info,
672                             params->bind_map,
673                             params->push_desc_info,
674                             params->dynamic_push_values);
675    if (shader == NULL)
676       return NULL;
677 
678    struct vk_pipeline_cache_object *cached =
679       vk_pipeline_cache_add_object(cache, &shader->base);
680 
681    return container_of(cached, struct anv_shader_bin, base);
682 }
683 
684 #define SHA1_KEY_SIZE 20
685 
686 struct nir_shader *
anv_device_search_for_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[SHA1_KEY_SIZE],void * mem_ctx)687 anv_device_search_for_nir(struct anv_device *device,
688                           struct vk_pipeline_cache *cache,
689                           const nir_shader_compiler_options *nir_options,
690                           unsigned char sha1_key[SHA1_KEY_SIZE],
691                           void *mem_ctx)
692 {
693    if (cache == NULL)
694       cache = device->vk.mem_cache;
695 
696    return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
697                                        nir_options, NULL, mem_ctx);
698 }
699 
700 void
anv_device_upload_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const struct nir_shader * nir,unsigned char sha1_key[SHA1_KEY_SIZE])701 anv_device_upload_nir(struct anv_device *device,
702                       struct vk_pipeline_cache *cache,
703                       const struct nir_shader *nir,
704                       unsigned char sha1_key[SHA1_KEY_SIZE])
705 {
706    if (cache == NULL)
707       cache = device->vk.mem_cache;
708 
709    vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
710 }
711 
712 void
anv_load_fp64_shader(struct anv_device * device)713 anv_load_fp64_shader(struct anv_device *device)
714 {
715    const nir_shader_compiler_options *nir_options =
716       device->physical->compiler->nir_options[MESA_SHADER_VERTEX];
717 
718    const char* shader_name = "float64_spv_lib";
719    struct mesa_sha1 sha1_ctx;
720    uint8_t sha1[20];
721    _mesa_sha1_init(&sha1_ctx);
722    _mesa_sha1_update(&sha1_ctx, shader_name, strlen(shader_name));
723    _mesa_sha1_final(&sha1_ctx, sha1);
724 
725    device->fp64_nir =
726       anv_device_search_for_nir(device, device->internal_cache,
727                                    nir_options, sha1, NULL);
728 
729    /* The shader found, no need to call spirv_to_nir() again. */
730    if (device->fp64_nir)
731       return;
732 
733    const struct spirv_capabilities spirv_caps = {
734       .Addresses = true,
735       .Float64 = true,
736       .Int8 = true,
737       .Int16 = true,
738       .Int64 = true,
739    };
740 
741    struct spirv_to_nir_options spirv_options = {
742       .capabilities = &spirv_caps,
743       .environment = NIR_SPIRV_VULKAN,
744       .create_library = true
745    };
746 
747    nir_shader* nir =
748       spirv_to_nir(float64_spv_source, sizeof(float64_spv_source) / 4,
749                    NULL, 0, MESA_SHADER_VERTEX, "main",
750                    &spirv_options, nir_options);
751 
752    assert(nir != NULL);
753 
754    nir_validate_shader(nir, "after spirv_to_nir");
755    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
756 
757    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
758    NIR_PASS_V(nir, nir_lower_returns);
759    NIR_PASS_V(nir, nir_inline_functions);
760    NIR_PASS_V(nir, nir_opt_deref);
761 
762    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
763    NIR_PASS_V(nir, nir_copy_prop);
764    NIR_PASS_V(nir, nir_opt_dce);
765    NIR_PASS_V(nir, nir_opt_cse);
766    NIR_PASS_V(nir, nir_opt_gcm, true);
767    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
768    NIR_PASS_V(nir, nir_opt_dce);
769 
770    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
771               nir_address_format_62bit_generic);
772 
773    anv_device_upload_nir(device, device->internal_cache,
774                          nir, sha1);
775 
776    device->fp64_nir = nir;
777 }
778