1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/blob.h"
25 #include "util/hash_table.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "nir/nir_serialize.h"
30 #include "anv_private.h"
31 #include "nir/nir_xfb_info.h"
32 #include "vk_util.h"
33 #include "compiler/spirv/nir_spirv.h"
34 #include "shaders/float64_spv.h"
35
36 /**
37 * Embedded sampler management.
38 */
39
40 static unsigned
embedded_sampler_key_hash(const void * key)41 embedded_sampler_key_hash(const void *key)
42 {
43 return _mesa_hash_data(key, sizeof(struct anv_embedded_sampler_key));
44 }
45
46 static bool
embedded_sampler_key_equal(const void * a,const void * b)47 embedded_sampler_key_equal(const void *a, const void *b)
48 {
49 return memcmp(a, b, sizeof(struct anv_embedded_sampler_key)) == 0;
50 }
51
52 static void
anv_embedded_sampler_free(struct anv_device * device,struct anv_embedded_sampler * sampler)53 anv_embedded_sampler_free(struct anv_device *device,
54 struct anv_embedded_sampler *sampler)
55 {
56 anv_state_pool_free(&device->dynamic_state_pool, sampler->sampler_state);
57 anv_state_pool_free(&device->dynamic_state_pool, sampler->border_color_state);
58 vk_free(&device->vk.alloc, sampler);
59 }
60
61 static struct anv_embedded_sampler *
anv_embedded_sampler_ref(struct anv_embedded_sampler * sampler)62 anv_embedded_sampler_ref(struct anv_embedded_sampler *sampler)
63 {
64 sampler->ref_cnt++;
65 return sampler;
66 }
67
68 static void
anv_embedded_sampler_unref(struct anv_device * device,struct anv_embedded_sampler * sampler)69 anv_embedded_sampler_unref(struct anv_device *device,
70 struct anv_embedded_sampler *sampler)
71 {
72 simple_mtx_lock(&device->embedded_samplers.mutex);
73 if (--sampler->ref_cnt == 0) {
74 _mesa_hash_table_remove_key(device->embedded_samplers.map,
75 &sampler->key);
76 anv_embedded_sampler_free(device, sampler);
77 }
78 simple_mtx_unlock(&device->embedded_samplers.mutex);
79 }
80
81 void
anv_device_init_embedded_samplers(struct anv_device * device)82 anv_device_init_embedded_samplers(struct anv_device *device)
83 {
84 simple_mtx_init(&device->embedded_samplers.mutex, mtx_plain);
85 device->embedded_samplers.map =
86 _mesa_hash_table_create(NULL,
87 embedded_sampler_key_hash,
88 embedded_sampler_key_equal);
89 }
90
91 void
anv_device_finish_embedded_samplers(struct anv_device * device)92 anv_device_finish_embedded_samplers(struct anv_device *device)
93 {
94 hash_table_foreach(device->embedded_samplers.map, entry) {
95 anv_embedded_sampler_free(device, entry->data);
96 }
97 ralloc_free(device->embedded_samplers.map);
98 simple_mtx_destroy(&device->embedded_samplers.mutex);
99 }
100
101 static VkResult
anv_shader_bin_get_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map)102 anv_shader_bin_get_embedded_samplers(struct anv_device *device,
103 struct anv_shader_bin *shader,
104 const struct anv_pipeline_bind_map *bind_map)
105 {
106 VkResult result = VK_SUCCESS;
107
108 simple_mtx_lock(&device->embedded_samplers.mutex);
109
110 for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
111 struct hash_entry *entry =
112 _mesa_hash_table_search(device->embedded_samplers.map,
113 &bind_map->embedded_sampler_to_binding[i].key);
114 if (entry == NULL) {
115 shader->embedded_samplers[i] =
116 vk_zalloc(&device->vk.alloc,
117 sizeof(struct anv_embedded_sampler), 8,
118 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
119 if (shader->embedded_samplers[i] == NULL) {
120 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
121 goto err;
122 }
123
124 anv_genX(device->info, emit_embedded_sampler)(
125 device, shader->embedded_samplers[i],
126 &bind_map->embedded_sampler_to_binding[i]);
127 _mesa_hash_table_insert(device->embedded_samplers.map,
128 &shader->embedded_samplers[i]->key,
129 shader->embedded_samplers[i]);
130 } else {
131 shader->embedded_samplers[i] = anv_embedded_sampler_ref(entry->data);
132 }
133 }
134
135 err:
136 simple_mtx_unlock(&device->embedded_samplers.mutex);
137 return result;
138 }
139
140 /**
141 *
142 */
143
144 static bool
145 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
146 struct blob *blob);
147
148 struct vk_pipeline_cache_object *
149 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
150 const void *key_data, size_t key_size,
151 struct blob_reader *blob);
152
153 static void
anv_shader_bin_destroy(struct vk_device * _device,struct vk_pipeline_cache_object * object)154 anv_shader_bin_destroy(struct vk_device *_device,
155 struct vk_pipeline_cache_object *object)
156 {
157 struct anv_device *device =
158 container_of(_device, struct anv_device, vk);
159
160 struct anv_shader_bin *shader =
161 container_of(object, struct anv_shader_bin, base);
162
163 for (uint32_t i = 0; i < shader->bind_map.embedded_sampler_count; i++)
164 anv_embedded_sampler_unref(device, shader->embedded_samplers[i]);
165
166 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
167 vk_pipeline_cache_object_finish(&shader->base);
168 vk_free(&device->vk.alloc, shader);
169 }
170
171 static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
172 .serialize = anv_shader_bin_serialize,
173 .deserialize = anv_shader_bin_deserialize,
174 .destroy = anv_shader_bin_destroy,
175 };
176
177 const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
178 &anv_shader_bin_ops,
179 NULL
180 };
181
182 static void
anv_shader_bin_rewrite_embedded_samplers(struct anv_device * device,struct anv_shader_bin * shader,const struct anv_pipeline_bind_map * bind_map,const struct brw_stage_prog_data * prog_data_in)183 anv_shader_bin_rewrite_embedded_samplers(struct anv_device *device,
184 struct anv_shader_bin *shader,
185 const struct anv_pipeline_bind_map *bind_map,
186 const struct brw_stage_prog_data *prog_data_in)
187 {
188 int rv_count = 0;
189 struct brw_shader_reloc_value reloc_values[BRW_MAX_EMBEDDED_SAMPLERS];
190
191 for (uint32_t i = 0; i < bind_map->embedded_sampler_count; i++) {
192 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
193 .id = BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE + i,
194 .value = shader->embedded_samplers[i]->sampler_state.offset,
195 };
196 }
197
198 brw_write_shader_relocs(&device->physical->compiler->isa,
199 shader->kernel.map, prog_data_in,
200 reloc_values, rv_count);
201 }
202
203 static uint32_t
brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data * prog_data)204 brw_stage_prog_data_printf_num_args(const struct brw_stage_prog_data *prog_data)
205 {
206 uint32_t count = 0;
207 for (unsigned i = 0; i < prog_data->printf_info_count; i++)
208 count += prog_data->printf_info[i].num_args;
209 return count;
210 }
211
212 static uint32_t
brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data * prog_data)213 brw_stage_prog_data_printf_string_size(const struct brw_stage_prog_data *prog_data)
214 {
215 uint32_t size = 0;
216 for (unsigned i = 0; i < prog_data->printf_info_count; i++)
217 size += prog_data->printf_info[i].string_size;
218 return size;
219 }
220
221 static void
copy_uprintf(u_printf_info * out_infos,unsigned * out_arg_sizes,char * out_strings,const struct brw_stage_prog_data * prog_data)222 copy_uprintf(u_printf_info *out_infos,
223 unsigned *out_arg_sizes,
224 char *out_strings,
225 const struct brw_stage_prog_data *prog_data)
226 {
227 for (unsigned i = 0; i < prog_data->printf_info_count; i++) {
228 out_infos[i] = prog_data->printf_info[i];
229 out_infos[i].arg_sizes = out_arg_sizes;
230 memcpy(out_infos[i].arg_sizes,
231 prog_data->printf_info[i].arg_sizes,
232 sizeof(out_infos[i].arg_sizes[0]) * prog_data->printf_info[i].num_args);
233 out_infos[i].strings = out_strings;
234 memcpy(out_infos[i].strings,
235 prog_data->printf_info[i].strings,
236 prog_data->printf_info[i].string_size);
237
238 out_arg_sizes += prog_data->printf_info[i].num_args;
239 out_strings += prog_data->printf_info[i].string_size;
240 }
241 }
242
243 static struct anv_shader_bin *
anv_shader_bin_create(struct anv_device * device,gl_shader_stage stage,const void * key_data,uint32_t key_size,const void * kernel_data,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_in,uint32_t prog_data_size,const struct brw_compile_stats * stats,uint32_t num_stats,const nir_xfb_info * xfb_info_in,const struct anv_pipeline_bind_map * bind_map,const struct anv_push_descriptor_info * push_desc_info,enum anv_dynamic_push_bits dynamic_push_values)244 anv_shader_bin_create(struct anv_device *device,
245 gl_shader_stage stage,
246 const void *key_data, uint32_t key_size,
247 const void *kernel_data, uint32_t kernel_size,
248 const struct brw_stage_prog_data *prog_data_in,
249 uint32_t prog_data_size,
250 const struct brw_compile_stats *stats, uint32_t num_stats,
251 const nir_xfb_info *xfb_info_in,
252 const struct anv_pipeline_bind_map *bind_map,
253 const struct anv_push_descriptor_info *push_desc_info,
254 enum anv_dynamic_push_bits dynamic_push_values)
255 {
256 VK_MULTIALLOC(ma);
257 VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
258 VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
259 VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
260 prog_data_size);
261 VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
262 prog_data_in->num_relocs);
263 VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
264
265 VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
266 xfb_info_in == NULL ? 0 :
267 nir_xfb_info_size(xfb_info_in->output_count));
268
269 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
270 bind_map->surface_count);
271 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
272 bind_map->sampler_count);
273 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_embedded_sampler_binding,
274 embedded_sampler_to_binding,
275 bind_map->embedded_sampler_count);
276 VK_MULTIALLOC_DECL(&ma, struct brw_kernel_arg_desc, kernel_args,
277 bind_map->kernel_arg_count);
278 VK_MULTIALLOC_DECL(&ma, struct anv_embedded_sampler *, embedded_samplers,
279 bind_map->embedded_sampler_count);
280 VK_MULTIALLOC_DECL(&ma, u_printf_info, printf_infos,
281 INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
282 prog_data_in->printf_info_count : 0);
283 VK_MULTIALLOC_DECL(&ma, unsigned, arg_sizes,
284 INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
285 brw_stage_prog_data_printf_num_args(prog_data_in) : 0);
286 VK_MULTIALLOC_DECL(&ma, char, strings,
287 INTEL_DEBUG(DEBUG_SHADER_PRINT) ?
288 brw_stage_prog_data_printf_string_size(prog_data_in) : 0);
289
290 if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
291 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
292 return NULL;
293
294 memcpy(obj_key_data, key_data, key_size);
295 vk_pipeline_cache_object_init(&device->vk, &shader->base,
296 &anv_shader_bin_ops, obj_key_data, key_size);
297
298 shader->stage = stage;
299
300 shader->kernel =
301 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
302 memcpy(shader->kernel.map, kernel_data, kernel_size);
303 shader->kernel_size = kernel_size;
304
305 if (bind_map->embedded_sampler_count > 0) {
306 shader->embedded_samplers = embedded_samplers;
307 if (anv_shader_bin_get_embedded_samplers(device, shader, bind_map) != VK_SUCCESS) {
308 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
309 vk_free(&device->vk.alloc, shader);
310 return NULL;
311 }
312 }
313
314 uint64_t shader_data_addr =
315 device->physical->va.instruction_state_pool.addr +
316 shader->kernel.offset +
317 prog_data_in->const_data_offset;
318
319 int rv_count = 0;
320 struct brw_shader_reloc_value reloc_values[9];
321 assert((device->physical->va.dynamic_visible_pool.addr & 0xffffffff) == 0);
322 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
323 .id = BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH,
324 .value = device->physical->va.dynamic_visible_pool.addr >> 32,
325 };
326 assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
327 assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
328 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
329 .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
330 .value = device->physical->indirect_descriptors ?
331 (device->physical->va.indirect_descriptor_pool.addr >> 32) :
332 (device->physical->va.internal_surface_state_pool.addr >> 32),
333 };
334 assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
335 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
336 .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
337 .value = shader_data_addr,
338 };
339 assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
340 assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
341 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
342 .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
343 .value = device->physical->va.instruction_state_pool.addr >> 32,
344 };
345 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
346 .id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
347 .value = shader->kernel.offset,
348 };
349 if (brw_shader_stage_is_bindless(stage)) {
350 const struct brw_bs_prog_data *bs_prog_data =
351 brw_bs_prog_data_const(prog_data_in);
352 uint64_t resume_sbt_addr =
353 device->physical->va.instruction_state_pool.addr +
354 shader->kernel.offset +
355 bs_prog_data->resume_sbt_offset;
356 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
357 .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
358 .value = resume_sbt_addr,
359 };
360 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
361 .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
362 .value = resume_sbt_addr >> 32,
363 };
364 }
365
366 if (INTEL_DEBUG(DEBUG_SHADER_PRINT) && prog_data_in->printf_info_count > 0) {
367 assert(device->printf.bo != NULL);
368
369 copy_uprintf(printf_infos, arg_sizes, strings, prog_data_in);
370
371 simple_mtx_lock(&device->printf.mutex);
372
373 uint32_t base_printf_idx =
374 util_dynarray_num_elements(&device->printf.prints, u_printf_info*);
375 for (uint32_t i = 0; i < prog_data_in->printf_info_count; i++) {
376 util_dynarray_append(&device->printf.prints, u_printf_info *,
377 &printf_infos[i]);
378 }
379
380 simple_mtx_unlock(&device->printf.mutex);
381
382 /* u_printf expects the string IDs to start at 1. */
383 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
384 .id = BRW_SHADER_RELOC_PRINTF_BASE_IDENTIFIER,
385 .value = base_printf_idx,
386 };
387 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
388 .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_LOW,
389 .value = device->printf.bo->offset & 0xffffffff,
390 };
391 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
392 .id = BRW_SHADER_RELOC_PRINTF_BUFFER_ADDR_HIGH,
393 .value = device->printf.bo->offset >> 32,
394 };
395 } else if (prog_data_in->printf_info_count > 0) {
396 unreachable("shader with printf intrinsics requires INTEL_DEBUG=shader-print");
397 }
398
399 brw_write_shader_relocs(&device->physical->compiler->isa,
400 shader->kernel.map, prog_data_in,
401 reloc_values, rv_count);
402
403 anv_shader_bin_rewrite_embedded_samplers(device, shader, bind_map, prog_data_in);
404
405 memcpy(prog_data, prog_data_in, prog_data_size);
406 typed_memcpy(prog_data_relocs, prog_data_in->relocs,
407 prog_data_in->num_relocs);
408 prog_data->relocs = prog_data_relocs;
409 memset(prog_data_param, 0,
410 prog_data->nr_params * sizeof(*prog_data_param));
411 prog_data->param = prog_data_param;
412 prog_data->printf_info = printf_infos;
413 shader->prog_data = prog_data;
414 shader->prog_data_size = prog_data_size;
415
416 assert(num_stats <= ARRAY_SIZE(shader->stats));
417 typed_memcpy(shader->stats, stats, num_stats);
418 shader->num_stats = num_stats;
419
420 if (xfb_info_in) {
421 *xfb_info = *xfb_info_in;
422 typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
423 xfb_info_in->output_count);
424 shader->xfb_info = xfb_info;
425 } else {
426 shader->xfb_info = NULL;
427 }
428
429 shader->dynamic_push_values = dynamic_push_values;
430
431 typed_memcpy(&shader->push_desc_info, push_desc_info, 1);
432
433 shader->bind_map = *bind_map;
434
435 typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
436 bind_map->surface_count);
437 shader->bind_map.surface_to_descriptor = surface_to_descriptor;
438
439 typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
440 bind_map->sampler_count);
441 shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
442
443 typed_memcpy(embedded_sampler_to_binding, bind_map->embedded_sampler_to_binding,
444 bind_map->embedded_sampler_count);
445 shader->bind_map.embedded_sampler_to_binding = embedded_sampler_to_binding;
446
447 typed_memcpy(kernel_args, bind_map->kernel_args,
448 bind_map->kernel_arg_count);
449 shader->bind_map.kernel_args = kernel_args;
450
451 return shader;
452 }
453
454 static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)455 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
456 struct blob *blob)
457 {
458 struct anv_shader_bin *shader =
459 container_of(object, struct anv_shader_bin, base);
460
461 blob_write_uint32(blob, shader->stage);
462
463 blob_write_uint32(blob, shader->kernel_size);
464 blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
465
466 blob_write_uint32(blob, shader->prog_data_size);
467
468 union brw_any_prog_data prog_data;
469 assert(shader->prog_data_size <= sizeof(prog_data));
470 memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
471 prog_data.base.relocs = NULL;
472 prog_data.base.param = NULL;
473 blob_write_bytes(blob, &prog_data, shader->prog_data_size);
474
475 blob_write_bytes(blob, shader->prog_data->relocs,
476 shader->prog_data->num_relocs *
477 sizeof(shader->prog_data->relocs[0]));
478 nir_serialize_printf_info(blob, shader->prog_data->printf_info,
479 shader->prog_data->printf_info_count);
480
481 blob_write_uint32(blob, shader->num_stats);
482 blob_write_bytes(blob, shader->stats,
483 shader->num_stats * sizeof(shader->stats[0]));
484
485 if (shader->xfb_info) {
486 uint32_t xfb_info_size =
487 nir_xfb_info_size(shader->xfb_info->output_count);
488 blob_write_uint32(blob, xfb_info_size);
489 blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
490 } else {
491 blob_write_uint32(blob, 0);
492 }
493
494 blob_write_uint32(blob, shader->dynamic_push_values);
495
496 blob_write_uint32(blob, shader->push_desc_info.used_descriptors);
497 blob_write_uint32(blob, shader->push_desc_info.fully_promoted_ubo_descriptors);
498 blob_write_uint8(blob, shader->push_desc_info.used_set_buffer);
499
500 blob_write_bytes(blob, shader->bind_map.surface_sha1,
501 sizeof(shader->bind_map.surface_sha1));
502 blob_write_bytes(blob, shader->bind_map.sampler_sha1,
503 sizeof(shader->bind_map.sampler_sha1));
504 blob_write_bytes(blob, shader->bind_map.push_sha1,
505 sizeof(shader->bind_map.push_sha1));
506 blob_write_uint32(blob, shader->bind_map.surface_count);
507 blob_write_uint32(blob, shader->bind_map.sampler_count);
508 blob_write_uint32(blob, shader->bind_map.embedded_sampler_count);
509 if (shader->stage == MESA_SHADER_KERNEL) {
510 uint32_t packed = (uint32_t)shader->bind_map.kernel_args_size << 16 |
511 (uint32_t)shader->bind_map.kernel_arg_count;
512 blob_write_uint32(blob, packed);
513 }
514 blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
515 shader->bind_map.surface_count *
516 sizeof(*shader->bind_map.surface_to_descriptor));
517 blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
518 shader->bind_map.sampler_count *
519 sizeof(*shader->bind_map.sampler_to_descriptor));
520 blob_write_bytes(blob, shader->bind_map.embedded_sampler_to_binding,
521 shader->bind_map.embedded_sampler_count *
522 sizeof(*shader->bind_map.embedded_sampler_to_binding));
523 blob_write_bytes(blob, shader->bind_map.kernel_args,
524 shader->bind_map.kernel_arg_count *
525 sizeof(*shader->bind_map.kernel_args));
526 blob_write_bytes(blob, shader->bind_map.push_ranges,
527 sizeof(shader->bind_map.push_ranges));
528
529 return !blob->out_of_memory;
530 }
531
532 struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)533 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
534 const void *key_data, size_t key_size,
535 struct blob_reader *blob)
536 {
537 struct anv_device *device =
538 container_of(cache->base.device, struct anv_device, vk);
539
540 gl_shader_stage stage = blob_read_uint32(blob);
541
542 uint32_t kernel_size = blob_read_uint32(blob);
543 const void *kernel_data = blob_read_bytes(blob, kernel_size);
544
545 uint32_t prog_data_size = blob_read_uint32(blob);
546 const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
547 if (blob->overrun)
548 return NULL;
549
550 union brw_any_prog_data prog_data;
551 memcpy(&prog_data, prog_data_bytes,
552 MIN2(sizeof(prog_data), prog_data_size));
553 prog_data.base.relocs =
554 blob_read_bytes(blob, prog_data.base.num_relocs *
555 sizeof(prog_data.base.relocs[0]));
556
557 void *mem_ctx = ralloc_context(NULL);
558 prog_data.base.printf_info =
559 nir_deserialize_printf_info(mem_ctx, blob,
560 &prog_data.base.printf_info_count);
561
562 uint32_t num_stats = blob_read_uint32(blob);
563 const struct brw_compile_stats *stats =
564 blob_read_bytes(blob, num_stats * sizeof(stats[0]));
565
566 const nir_xfb_info *xfb_info = NULL;
567 uint32_t xfb_size = blob_read_uint32(blob);
568 if (xfb_size)
569 xfb_info = blob_read_bytes(blob, xfb_size);
570
571 enum anv_dynamic_push_bits dynamic_push_values = blob_read_uint32(blob);
572
573 struct anv_push_descriptor_info push_desc_info = {};
574 push_desc_info.used_descriptors = blob_read_uint32(blob);
575 push_desc_info.fully_promoted_ubo_descriptors = blob_read_uint32(blob);
576 push_desc_info.used_set_buffer = blob_read_uint8(blob);
577
578 struct anv_pipeline_bind_map bind_map = {};
579 blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
580 blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
581 blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
582 bind_map.surface_count = blob_read_uint32(blob);
583 bind_map.sampler_count = blob_read_uint32(blob);
584 bind_map.embedded_sampler_count = blob_read_uint32(blob);
585 if (stage == MESA_SHADER_KERNEL) {
586 uint32_t packed = blob_read_uint32(blob);
587 bind_map.kernel_args_size = (uint16_t)(packed >> 16);
588 bind_map.kernel_arg_count = (uint16_t)packed;
589 }
590 bind_map.surface_to_descriptor = (void *)
591 blob_read_bytes(blob, bind_map.surface_count *
592 sizeof(*bind_map.surface_to_descriptor));
593 bind_map.sampler_to_descriptor = (void *)
594 blob_read_bytes(blob, bind_map.sampler_count *
595 sizeof(*bind_map.sampler_to_descriptor));
596 bind_map.embedded_sampler_to_binding = (void *)
597 blob_read_bytes(blob, bind_map.embedded_sampler_count *
598 sizeof(*bind_map.embedded_sampler_to_binding));
599 bind_map.kernel_args = (void *)
600 blob_read_bytes(blob, bind_map.kernel_arg_count *
601 sizeof(*bind_map.kernel_args));
602 blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
603
604 if (blob->overrun) {
605 ralloc_free(mem_ctx);
606 return NULL;
607 }
608
609 struct anv_shader_bin *shader =
610 anv_shader_bin_create(device, stage,
611 key_data, key_size,
612 kernel_data, kernel_size,
613 &prog_data.base, prog_data_size,
614 stats, num_stats, xfb_info, &bind_map,
615 &push_desc_info,
616 dynamic_push_values);
617
618 ralloc_free(mem_ctx);
619
620 if (shader == NULL)
621 return NULL;
622
623 return &shader->base;
624 }
625
626 struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const void * key_data,uint32_t key_size,bool * user_cache_hit)627 anv_device_search_for_kernel(struct anv_device *device,
628 struct vk_pipeline_cache *cache,
629 const void *key_data, uint32_t key_size,
630 bool *user_cache_hit)
631 {
632 /* Use the default pipeline cache if none is specified */
633 if (cache == NULL)
634 cache = device->vk.mem_cache;
635
636 bool cache_hit = false;
637 struct vk_pipeline_cache_object *object =
638 vk_pipeline_cache_lookup_object(cache, key_data, key_size,
639 &anv_shader_bin_ops, &cache_hit);
640 if (user_cache_hit != NULL) {
641 *user_cache_hit = object != NULL && cache_hit &&
642 cache != device->vk.mem_cache;
643 }
644
645 if (object == NULL)
646 return NULL;
647
648 return container_of(object, struct anv_shader_bin, base);
649 }
650
651 struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const struct anv_shader_upload_params * params)652 anv_device_upload_kernel(struct anv_device *device,
653 struct vk_pipeline_cache *cache,
654 const struct anv_shader_upload_params *params)
655 {
656 /* Use the default pipeline cache if none is specified */
657 if (cache == NULL)
658 cache = device->vk.mem_cache;
659
660 struct anv_shader_bin *shader =
661 anv_shader_bin_create(device,
662 params->stage,
663 params->key_data,
664 params->key_size,
665 params->kernel_data,
666 params->kernel_size,
667 params->prog_data,
668 params->prog_data_size,
669 params->stats,
670 params->num_stats,
671 params->xfb_info,
672 params->bind_map,
673 params->push_desc_info,
674 params->dynamic_push_values);
675 if (shader == NULL)
676 return NULL;
677
678 struct vk_pipeline_cache_object *cached =
679 vk_pipeline_cache_add_object(cache, &shader->base);
680
681 return container_of(cached, struct anv_shader_bin, base);
682 }
683
684 #define SHA1_KEY_SIZE 20
685
686 struct nir_shader *
anv_device_search_for_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[SHA1_KEY_SIZE],void * mem_ctx)687 anv_device_search_for_nir(struct anv_device *device,
688 struct vk_pipeline_cache *cache,
689 const nir_shader_compiler_options *nir_options,
690 unsigned char sha1_key[SHA1_KEY_SIZE],
691 void *mem_ctx)
692 {
693 if (cache == NULL)
694 cache = device->vk.mem_cache;
695
696 return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
697 nir_options, NULL, mem_ctx);
698 }
699
700 void
anv_device_upload_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const struct nir_shader * nir,unsigned char sha1_key[SHA1_KEY_SIZE])701 anv_device_upload_nir(struct anv_device *device,
702 struct vk_pipeline_cache *cache,
703 const struct nir_shader *nir,
704 unsigned char sha1_key[SHA1_KEY_SIZE])
705 {
706 if (cache == NULL)
707 cache = device->vk.mem_cache;
708
709 vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
710 }
711
712 void
anv_load_fp64_shader(struct anv_device * device)713 anv_load_fp64_shader(struct anv_device *device)
714 {
715 const nir_shader_compiler_options *nir_options =
716 device->physical->compiler->nir_options[MESA_SHADER_VERTEX];
717
718 const char* shader_name = "float64_spv_lib";
719 struct mesa_sha1 sha1_ctx;
720 uint8_t sha1[20];
721 _mesa_sha1_init(&sha1_ctx);
722 _mesa_sha1_update(&sha1_ctx, shader_name, strlen(shader_name));
723 _mesa_sha1_final(&sha1_ctx, sha1);
724
725 device->fp64_nir =
726 anv_device_search_for_nir(device, device->internal_cache,
727 nir_options, sha1, NULL);
728
729 /* The shader found, no need to call spirv_to_nir() again. */
730 if (device->fp64_nir)
731 return;
732
733 const struct spirv_capabilities spirv_caps = {
734 .Addresses = true,
735 .Float64 = true,
736 .Int8 = true,
737 .Int16 = true,
738 .Int64 = true,
739 };
740
741 struct spirv_to_nir_options spirv_options = {
742 .capabilities = &spirv_caps,
743 .environment = NIR_SPIRV_VULKAN,
744 .create_library = true
745 };
746
747 nir_shader* nir =
748 spirv_to_nir(float64_spv_source, sizeof(float64_spv_source) / 4,
749 NULL, 0, MESA_SHADER_VERTEX, "main",
750 &spirv_options, nir_options);
751
752 assert(nir != NULL);
753
754 nir_validate_shader(nir, "after spirv_to_nir");
755 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
756
757 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
758 NIR_PASS_V(nir, nir_lower_returns);
759 NIR_PASS_V(nir, nir_inline_functions);
760 NIR_PASS_V(nir, nir_opt_deref);
761
762 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
763 NIR_PASS_V(nir, nir_copy_prop);
764 NIR_PASS_V(nir, nir_opt_dce);
765 NIR_PASS_V(nir, nir_opt_cse);
766 NIR_PASS_V(nir, nir_opt_gcm, true);
767 NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
768 NIR_PASS_V(nir, nir_opt_dce);
769
770 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
771 nir_address_format_62bit_generic);
772
773 anv_device_upload_nir(device, device->internal_cache,
774 nir, sha1);
775
776 device->fp64_nir = nir;
777 }
778