1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_compute_slm.h"
33 #include "common/intel_l3_config.h"
34 #include "common/intel_sample_positions.h"
35 #include "compiler/elk/elk_disasm.h"
36 #include "anv_private.h"
37 #include "compiler/elk/elk_nir.h"
38 #include "compiler/intel_nir.h"
39 #include "anv_nir.h"
40 #include "nir/nir_xfb_info.h"
41 #include "spirv/nir_spirv.h"
42 #include "vk_pipeline.h"
43 #include "vk_render_pass.h"
44 #include "vk_util.h"
45
46 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
47 * we can't do that yet because we don't have the ability to copy nir.
48 */
49 static nir_shader *
anv_shader_stage_to_nir(struct anv_device * device,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * stage_info,enum elk_robustness_flags robust_flags,void * mem_ctx)50 anv_shader_stage_to_nir(struct anv_device *device,
51 VkPipelineCreateFlags2KHR pipeline_flags,
52 const VkPipelineShaderStageCreateInfo *stage_info,
53 enum elk_robustness_flags robust_flags,
54 void *mem_ctx)
55 {
56 const struct anv_physical_device *pdevice = device->physical;
57 const struct elk_compiler *compiler = pdevice->compiler;
58 gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
59 const nir_shader_compiler_options *nir_options =
60 compiler->nir_options[stage];
61
62 const struct spirv_to_nir_options spirv_options = {
63 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
64 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
65 .phys_ssbo_addr_format = nir_address_format_64bit_global,
66 .push_const_addr_format = nir_address_format_logical,
67
68 /* TODO: Consider changing this to an address format that has the NULL
69 * pointer equals to 0. That might be a better format to play nice
70 * with certain code / code generators.
71 */
72 .shared_addr_format = nir_address_format_32bit_offset,
73
74 .min_ubo_alignment = ANV_UBO_ALIGNMENT,
75 .min_ssbo_alignment = ANV_SSBO_ALIGNMENT,
76 };
77
78 nir_shader *nir;
79 VkResult result =
80 vk_pipeline_shader_stage_to_nir(&device->vk, pipeline_flags, stage_info,
81 &spirv_options, nir_options,
82 mem_ctx, &nir);
83 if (result != VK_SUCCESS)
84 return NULL;
85
86 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
87 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
88 gl_shader_stage_name(stage));
89 nir_print_shader(nir, stderr);
90 }
91
92 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
93 nir_shader_get_entrypoint(nir), true, false);
94
95 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
96 .point_coord = true,
97 };
98 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
99
100 const nir_opt_access_options opt_access_options = {
101 .is_vulkan = true,
102 };
103 NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
104
105 /* Vulkan uses the separate-shader linking model */
106 nir->info.separate_shader = true;
107
108 struct elk_nir_compiler_opts opts = {};
109
110 elk_preprocess_nir(compiler, nir, &opts);
111
112 return nir;
113 }
114
115 VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags flags,const VkAllocationCallbacks * pAllocator)116 anv_pipeline_init(struct anv_pipeline *pipeline,
117 struct anv_device *device,
118 enum anv_pipeline_type type,
119 VkPipelineCreateFlags flags,
120 const VkAllocationCallbacks *pAllocator)
121 {
122 VkResult result;
123
124 memset(pipeline, 0, sizeof(*pipeline));
125
126 vk_object_base_init(&device->vk, &pipeline->base,
127 VK_OBJECT_TYPE_PIPELINE);
128 pipeline->device = device;
129
130 /* It's the job of the child class to provide actual backing storage for
131 * the batch by setting batch.start, batch.next, and batch.end.
132 */
133 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
134 pipeline->batch.relocs = &pipeline->batch_relocs;
135 pipeline->batch.status = VK_SUCCESS;
136
137 result = anv_reloc_list_init(&pipeline->batch_relocs,
138 pipeline->batch.alloc);
139 if (result != VK_SUCCESS)
140 return result;
141
142 pipeline->mem_ctx = ralloc_context(NULL);
143
144 pipeline->type = type;
145 pipeline->flags = flags;
146
147 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
148
149 return VK_SUCCESS;
150 }
151
152 void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device,const VkAllocationCallbacks * pAllocator)153 anv_pipeline_finish(struct anv_pipeline *pipeline,
154 struct anv_device *device,
155 const VkAllocationCallbacks *pAllocator)
156 {
157 anv_reloc_list_finish(&pipeline->batch_relocs,
158 pAllocator ? pAllocator : &device->vk.alloc);
159 ralloc_free(pipeline->mem_ctx);
160 vk_object_base_finish(&pipeline->base);
161 }
162
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)163 void anv_DestroyPipeline(
164 VkDevice _device,
165 VkPipeline _pipeline,
166 const VkAllocationCallbacks* pAllocator)
167 {
168 ANV_FROM_HANDLE(anv_device, device, _device);
169 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
170
171 if (!pipeline)
172 return;
173
174 switch (pipeline->type) {
175 case ANV_PIPELINE_GRAPHICS: {
176 struct anv_graphics_pipeline *gfx_pipeline =
177 anv_pipeline_to_graphics(pipeline);
178
179 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
180 if (gfx_pipeline->shaders[s])
181 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
182 }
183 break;
184 }
185
186 case ANV_PIPELINE_COMPUTE: {
187 struct anv_compute_pipeline *compute_pipeline =
188 anv_pipeline_to_compute(pipeline);
189
190 if (compute_pipeline->cs)
191 anv_shader_bin_unref(device, compute_pipeline->cs);
192
193 break;
194 }
195
196 default:
197 unreachable("invalid pipeline type");
198 }
199
200 anv_pipeline_finish(pipeline, device, pAllocator);
201 vk_free2(&device->vk.alloc, pAllocator, pipeline);
202 }
203
204 static void
populate_sampler_prog_key(const struct intel_device_info * devinfo,struct elk_sampler_prog_key_data * key)205 populate_sampler_prog_key(const struct intel_device_info *devinfo,
206 struct elk_sampler_prog_key_data *key)
207 {
208 /* XXX: Handle texture swizzle Pre-HSW */
209 }
210
211 static void
populate_base_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_base_prog_key * key)212 populate_base_prog_key(const struct anv_device *device,
213 enum elk_robustness_flags robust_flags,
214 struct elk_base_prog_key *key)
215 {
216 key->robust_flags = robust_flags;
217 key->limit_trig_input_range =
218 device->physical->instance->limit_trig_input_range;
219
220 populate_sampler_prog_key(device->info, &key->tex);
221 }
222
223 static void
populate_vs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_vs_prog_key * key)224 populate_vs_prog_key(const struct anv_device *device,
225 enum elk_robustness_flags robust_flags,
226 struct elk_vs_prog_key *key)
227 {
228 memset(key, 0, sizeof(*key));
229
230 populate_base_prog_key(device, robust_flags, &key->base);
231
232 /* XXX: Handle vertex input work-arounds */
233
234 /* XXX: Handle sampler_prog_key */
235 }
236
237 static void
populate_tcs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,unsigned input_vertices,struct elk_tcs_prog_key * key)238 populate_tcs_prog_key(const struct anv_device *device,
239 enum elk_robustness_flags robust_flags,
240 unsigned input_vertices,
241 struct elk_tcs_prog_key *key)
242 {
243 memset(key, 0, sizeof(*key));
244
245 populate_base_prog_key(device, robust_flags, &key->base);
246
247 key->input_vertices = input_vertices;
248 }
249
250 static void
populate_tes_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_tes_prog_key * key)251 populate_tes_prog_key(const struct anv_device *device,
252 enum elk_robustness_flags robust_flags,
253 struct elk_tes_prog_key *key)
254 {
255 memset(key, 0, sizeof(*key));
256
257 populate_base_prog_key(device, robust_flags, &key->base);
258 }
259
260 static void
populate_gs_prog_key(const struct anv_device * device,bool robust_flags,struct elk_gs_prog_key * key)261 populate_gs_prog_key(const struct anv_device *device,
262 bool robust_flags,
263 struct elk_gs_prog_key *key)
264 {
265 memset(key, 0, sizeof(*key));
266
267 populate_base_prog_key(device, robust_flags, &key->base);
268 }
269
270 static void
populate_wm_prog_key(const struct anv_graphics_pipeline * pipeline,enum elk_robustness_flags robust_flags,const BITSET_WORD * dynamic,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp,struct elk_wm_prog_key * key)271 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
272 enum elk_robustness_flags robust_flags,
273 const BITSET_WORD *dynamic,
274 const struct vk_multisample_state *ms,
275 const struct vk_render_pass_state *rp,
276 struct elk_wm_prog_key *key)
277 {
278 const struct anv_device *device = pipeline->base.device;
279
280 memset(key, 0, sizeof(*key));
281
282 populate_base_prog_key(device, robust_flags, &key->base);
283
284 /* We set this to 0 here and set to the actual value before we call
285 * elk_compile_fs.
286 */
287 key->input_slots_valid = 0;
288
289 /* XXX Vulkan doesn't appear to specify */
290 key->clamp_fragment_color = false;
291
292 key->ignore_sample_mask_out = false;
293
294 assert(rp->color_attachment_count <= MAX_RTS);
295 /* Consider all inputs as valid until look at the NIR variables. */
296 key->color_outputs_valid = (1u << rp->color_attachment_count) - 1;
297 key->nr_color_regions = rp->color_attachment_count;
298
299 /* To reduce possible shader recompilations we would need to know if
300 * there is a SampleMask output variable to compute if we should emit
301 * code to workaround the issue that hardware disables alpha to coverage
302 * when there is SampleMask output.
303 */
304 key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable ?
305 ELK_ALWAYS : ELK_NEVER;
306
307 /* Vulkan doesn't support fixed-function alpha test */
308 key->alpha_test_replicate_alpha = false;
309
310 if (ms != NULL) {
311 /* We should probably pull this out of the shader, but it's fairly
312 * harmless to compute it and then let dead-code take care of it.
313 */
314 if (ms->rasterization_samples > 1) {
315 key->persample_interp =
316 (ms->sample_shading_enable &&
317 (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
318 ELK_ALWAYS : ELK_NEVER;
319 key->multisample_fbo = ELK_ALWAYS;
320 }
321
322 if (device->physical->instance->sample_mask_out_opengl_behaviour)
323 key->ignore_sample_mask_out = !key->multisample_fbo;
324 }
325 }
326
327 static void
populate_cs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_cs_prog_key * key)328 populate_cs_prog_key(const struct anv_device *device,
329 enum elk_robustness_flags robust_flags,
330 struct elk_cs_prog_key *key)
331 {
332 memset(key, 0, sizeof(*key));
333
334 populate_base_prog_key(device, robust_flags, &key->base);
335 }
336
337 struct anv_pipeline_stage {
338 gl_shader_stage stage;
339
340 VkPipelineCreateFlags2KHR pipeline_flags;
341 const VkPipelineShaderStageCreateInfo *info;
342
343 unsigned char shader_sha1[20];
344
345 union elk_any_prog_key key;
346
347 struct {
348 gl_shader_stage stage;
349 unsigned char sha1[20];
350 } cache_key;
351
352 nir_shader *nir;
353
354 struct anv_pipeline_binding surface_to_descriptor[256];
355 struct anv_pipeline_binding sampler_to_descriptor[256];
356 struct anv_pipeline_bind_map bind_map;
357
358 union elk_any_prog_data prog_data;
359
360 uint32_t num_stats;
361 struct elk_compile_stats stats[3];
362 char *disasm[3];
363
364 VkPipelineCreationFeedback feedback;
365
366 const unsigned *code;
367
368 struct anv_shader_bin *bin;
369 };
370
371 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stages,unsigned char * sha1_out)372 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
373 struct anv_pipeline_layout *layout,
374 struct anv_pipeline_stage *stages,
375 unsigned char *sha1_out)
376 {
377 struct mesa_sha1 ctx;
378 _mesa_sha1_init(&ctx);
379
380 _mesa_sha1_update(&ctx, &pipeline->view_mask,
381 sizeof(pipeline->view_mask));
382
383 if (layout)
384 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
385
386 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
387 if (stages[s].info) {
388 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
389 sizeof(stages[s].shader_sha1));
390 _mesa_sha1_update(&ctx, &stages[s].key, elk_prog_key_size(s));
391 }
392 }
393
394 _mesa_sha1_final(&ctx, sha1_out);
395 }
396
397 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)398 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
399 struct anv_pipeline_layout *layout,
400 struct anv_pipeline_stage *stage,
401 unsigned char *sha1_out)
402 {
403 struct mesa_sha1 ctx;
404 _mesa_sha1_init(&ctx);
405
406 if (layout)
407 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
408
409 const struct anv_device *device = pipeline->base.device;
410
411 const bool rba = device->vk.enabled_features.robustBufferAccess;
412 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
413
414 const uint8_t afs = device->physical->instance->assume_full_subgroups;
415 _mesa_sha1_update(&ctx, &afs, sizeof(afs));
416
417 _mesa_sha1_update(&ctx, stage->shader_sha1,
418 sizeof(stage->shader_sha1));
419 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
420
421 _mesa_sha1_final(&ctx, sha1_out);
422 }
423
424 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct vk_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)425 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
426 struct vk_pipeline_cache *cache,
427 void *mem_ctx,
428 struct anv_pipeline_stage *stage)
429 {
430 const struct elk_compiler *compiler =
431 pipeline->device->physical->compiler;
432 const nir_shader_compiler_options *nir_options =
433 compiler->nir_options[stage->stage];
434 nir_shader *nir;
435
436 nir = anv_device_search_for_nir(pipeline->device, cache,
437 nir_options,
438 stage->shader_sha1,
439 mem_ctx);
440 if (nir) {
441 assert(nir->info.stage == stage->stage);
442 return nir;
443 }
444
445 nir = anv_shader_stage_to_nir(pipeline->device,
446 stage->pipeline_flags, stage->info,
447 stage->key.base.robust_flags, mem_ctx);
448 if (nir) {
449 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
450 return nir;
451 }
452
453 return NULL;
454 }
455
456 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)457 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
458 {
459 assert(glsl_type_is_vector_or_scalar(type));
460
461 uint32_t comp_size = glsl_type_is_boolean(type)
462 ? 4 : glsl_get_bit_size(type) / 8;
463 unsigned length = glsl_get_vector_elements(type);
464 *size = comp_size * length,
465 *align = comp_size * (length == 3 ? 4 : length);
466 }
467
468 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_layout * layout)469 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
470 void *mem_ctx,
471 struct anv_pipeline_stage *stage,
472 struct anv_pipeline_layout *layout)
473 {
474 const struct anv_physical_device *pdevice = pipeline->device->physical;
475 const struct elk_compiler *compiler = pdevice->compiler;
476
477 struct elk_stage_prog_data *prog_data = &stage->prog_data.base;
478 nir_shader *nir = stage->nir;
479
480 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
481 NIR_PASS(_, nir, nir_lower_wpos_center);
482 NIR_PASS(_, nir, nir_lower_input_attachments,
483 &(nir_input_attachment_options) {
484 .use_fragcoord_sysval = true,
485 .use_layer_id_sysval = true,
486 });
487 }
488
489 NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout);
490
491 if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
492 struct anv_graphics_pipeline *gfx_pipeline =
493 anv_pipeline_to_graphics(pipeline);
494 NIR_PASS(_, nir, anv_nir_lower_multiview, gfx_pipeline->view_mask);
495 }
496
497 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
498
499 NIR_PASS(_, nir, elk_nir_lower_storage_image,
500 &(struct elk_nir_lower_storage_image_opts) {
501 .devinfo = compiler->devinfo,
502 .lower_loads = true,
503 .lower_stores = true,
504 .lower_atomics = true,
505 .lower_get_size = true,
506 });
507
508 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
509 nir_address_format_64bit_global);
510 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
511 nir_address_format_32bit_offset);
512
513 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
514 NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
515 pdevice, stage->key.base.robust_flags,
516 layout, &stage->bind_map);
517
518 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
519 anv_nir_ubo_addr_format(pdevice, stage->key.base.robust_flags));
520 NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
521 anv_nir_ssbo_addr_format(pdevice, stage->key.base.robust_flags));
522
523 /* First run copy-prop to get rid of all of the vec() that address
524 * calculations often create and then constant-fold so that, when we
525 * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
526 */
527 NIR_PASS(_, nir, nir_copy_prop);
528 NIR_PASS(_, nir, nir_opt_constant_folding);
529
530 NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
531
532 enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
533 nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access;
534
535 /* In practice, most shaders do not have non-uniform-qualified
536 * accesses (see
537 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
538 * thus a cheaper and likely to fail check is run first.
539 */
540 if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
541 NIR_PASS(_, nir, nir_opt_non_uniform_access);
542
543 /* We don't support non-uniform UBOs and non-uniform SSBO access is
544 * handled naturally by falling back to A64 messages.
545 */
546 NIR_PASS(_, nir, nir_lower_non_uniform_access,
547 &(nir_lower_non_uniform_access_options) {
548 .types = lower_non_uniform_access_types,
549 .callback = NULL,
550 });
551 }
552
553 NIR_PASS_V(nir, anv_nir_compute_push_layout,
554 pdevice, stage->key.base.robust_flags,
555 prog_data, &stage->bind_map, mem_ctx);
556
557 if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
558 if (!nir->info.shared_memory_explicit_layout) {
559 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
560 nir_var_mem_shared, shared_type_info);
561 }
562
563 NIR_PASS(_, nir, nir_lower_explicit_io,
564 nir_var_mem_shared, nir_address_format_32bit_offset);
565
566 if (nir->info.zero_initialize_shared_memory &&
567 nir->info.shared_size > 0) {
568 /* The effective Shared Local Memory size is at least 1024 bytes and
569 * is always rounded to a power of two, so it is OK to align the size
570 * used by the shader to chunk_size -- which does simplify the logic.
571 */
572 const unsigned chunk_size = 16;
573 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
574 assert(shared_size <=
575 intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
576
577 NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
578 shared_size, chunk_size);
579 }
580 }
581
582 if (gl_shader_stage_is_compute(nir->info.stage)) {
583 NIR_PASS(_, nir, elk_nir_lower_cs_intrinsics, compiler->devinfo,
584 &stage->prog_data.cs);
585 }
586
587 stage->nir = nir;
588 }
589
590 static void
anv_pipeline_link_vs(const struct elk_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)591 anv_pipeline_link_vs(const struct elk_compiler *compiler,
592 struct anv_pipeline_stage *vs_stage,
593 struct anv_pipeline_stage *next_stage)
594 {
595 if (next_stage)
596 elk_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
597 }
598
599 static void
anv_pipeline_compile_vs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_graphics_pipeline * pipeline,struct anv_pipeline_stage * vs_stage)600 anv_pipeline_compile_vs(const struct elk_compiler *compiler,
601 void *mem_ctx,
602 struct anv_graphics_pipeline *pipeline,
603 struct anv_pipeline_stage *vs_stage)
604 {
605 /* When using Primitive Replication for multiview, each view gets its own
606 * position slot.
607 */
608 uint32_t pos_slots =
609 (vs_stage->nir->info.per_view_outputs & VARYING_BIT_POS) ?
610 MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
611
612 /* Only position is allowed to be per-view */
613 assert(!(vs_stage->nir->info.per_view_outputs & ~VARYING_BIT_POS));
614
615 elk_compute_vue_map(compiler->devinfo,
616 &vs_stage->prog_data.vs.base.vue_map,
617 vs_stage->nir->info.outputs_written,
618 vs_stage->nir->info.separate_shader,
619 pos_slots);
620
621 vs_stage->num_stats = 1;
622
623 struct elk_compile_vs_params params = {
624 .base = {
625 .nir = vs_stage->nir,
626 .stats = vs_stage->stats,
627 .log_data = pipeline->base.device,
628 .mem_ctx = mem_ctx,
629 },
630 .key = &vs_stage->key.vs,
631 .prog_data = &vs_stage->prog_data.vs,
632 };
633
634 vs_stage->code = elk_compile_vs(compiler, ¶ms);
635 }
636
637 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)638 merge_tess_info(struct shader_info *tes_info,
639 const struct shader_info *tcs_info)
640 {
641 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
642 *
643 * "PointMode. Controls generation of points rather than triangles
644 * or lines. This functionality defaults to disabled, and is
645 * enabled if either shader stage includes the execution mode.
646 *
647 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
648 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
649 * and OutputVertices, it says:
650 *
651 * "One mode must be set in at least one of the tessellation
652 * shader stages."
653 *
654 * So, the fields can be set in either the TCS or TES, but they must
655 * agree if set in both. Our backend looks at TES, so bitwise-or in
656 * the values from the TCS.
657 */
658 assert(tcs_info->tess.tcs_vertices_out == 0 ||
659 tes_info->tess.tcs_vertices_out == 0 ||
660 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
661 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
662
663 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
664 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
665 tcs_info->tess.spacing == tes_info->tess.spacing);
666 tes_info->tess.spacing |= tcs_info->tess.spacing;
667
668 assert(tcs_info->tess._primitive_mode == 0 ||
669 tes_info->tess._primitive_mode == 0 ||
670 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
671 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
672 tes_info->tess.ccw |= tcs_info->tess.ccw;
673 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
674 }
675
676 static void
anv_pipeline_link_tcs(const struct elk_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)677 anv_pipeline_link_tcs(const struct elk_compiler *compiler,
678 struct anv_pipeline_stage *tcs_stage,
679 struct anv_pipeline_stage *tes_stage)
680 {
681 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
682
683 elk_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
684
685 nir_lower_patch_vertices(tes_stage->nir,
686 tcs_stage->nir->info.tess.tcs_vertices_out,
687 NULL);
688
689 /* Copy TCS info into the TES info */
690 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
691
692 /* Whacking the key after cache lookup is a bit sketchy, but all of
693 * this comes from the SPIR-V, which is part of the hash used for the
694 * pipeline cache. So it should be safe.
695 */
696 tcs_stage->key.tcs._tes_primitive_mode =
697 tes_stage->nir->info.tess._primitive_mode;
698 tcs_stage->key.tcs.quads_workaround =
699 compiler->devinfo->ver < 9 &&
700 tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
701 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
702 }
703
704 static void
anv_pipeline_compile_tcs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage)705 anv_pipeline_compile_tcs(const struct elk_compiler *compiler,
706 void *mem_ctx,
707 struct anv_device *device,
708 struct anv_pipeline_stage *tcs_stage,
709 struct anv_pipeline_stage *prev_stage)
710 {
711 tcs_stage->key.tcs.outputs_written =
712 tcs_stage->nir->info.outputs_written;
713 tcs_stage->key.tcs.patch_outputs_written =
714 tcs_stage->nir->info.patch_outputs_written;
715
716 tcs_stage->num_stats = 1;
717
718 struct elk_compile_tcs_params params = {
719 .base = {
720 .nir = tcs_stage->nir,
721 .stats = tcs_stage->stats,
722 .log_data = device,
723 .mem_ctx = mem_ctx,
724 },
725 .key = &tcs_stage->key.tcs,
726 .prog_data = &tcs_stage->prog_data.tcs,
727 };
728
729 tcs_stage->code = elk_compile_tcs(compiler, ¶ms);
730 }
731
732 static void
anv_pipeline_link_tes(const struct elk_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)733 anv_pipeline_link_tes(const struct elk_compiler *compiler,
734 struct anv_pipeline_stage *tes_stage,
735 struct anv_pipeline_stage *next_stage)
736 {
737 if (next_stage)
738 elk_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
739 }
740
741 static void
anv_pipeline_compile_tes(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage)742 anv_pipeline_compile_tes(const struct elk_compiler *compiler,
743 void *mem_ctx,
744 struct anv_device *device,
745 struct anv_pipeline_stage *tes_stage,
746 struct anv_pipeline_stage *tcs_stage)
747 {
748 tes_stage->key.tes.inputs_read =
749 tcs_stage->nir->info.outputs_written;
750 tes_stage->key.tes.patch_inputs_read =
751 tcs_stage->nir->info.patch_outputs_written;
752
753 tes_stage->num_stats = 1;
754
755 struct elk_compile_tes_params params = {
756 .base = {
757 .nir = tes_stage->nir,
758 .stats = tes_stage->stats,
759 .log_data = device,
760 .mem_ctx = mem_ctx,
761 },
762 .key = &tes_stage->key.tes,
763 .prog_data = &tes_stage->prog_data.tes,
764 .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
765 };
766
767 tes_stage->code = elk_compile_tes(compiler, ¶ms);
768 }
769
770 static void
anv_pipeline_link_gs(const struct elk_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)771 anv_pipeline_link_gs(const struct elk_compiler *compiler,
772 struct anv_pipeline_stage *gs_stage,
773 struct anv_pipeline_stage *next_stage)
774 {
775 if (next_stage)
776 elk_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
777 }
778
779 static void
anv_pipeline_compile_gs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage)780 anv_pipeline_compile_gs(const struct elk_compiler *compiler,
781 void *mem_ctx,
782 struct anv_device *device,
783 struct anv_pipeline_stage *gs_stage,
784 struct anv_pipeline_stage *prev_stage)
785 {
786 elk_compute_vue_map(compiler->devinfo,
787 &gs_stage->prog_data.gs.base.vue_map,
788 gs_stage->nir->info.outputs_written,
789 gs_stage->nir->info.separate_shader, 1);
790
791 gs_stage->num_stats = 1;
792
793 struct elk_compile_gs_params params = {
794 .base = {
795 .nir = gs_stage->nir,
796 .stats = gs_stage->stats,
797 .log_data = device,
798 .mem_ctx = mem_ctx,
799 },
800 .key = &gs_stage->key.gs,
801 .prog_data = &gs_stage->prog_data.gs,
802 };
803
804 gs_stage->code = elk_compile_gs(compiler, ¶ms);
805 }
806
807 static void
anv_pipeline_link_fs(const struct elk_compiler * compiler,struct anv_pipeline_stage * stage,const struct vk_render_pass_state * rp)808 anv_pipeline_link_fs(const struct elk_compiler *compiler,
809 struct anv_pipeline_stage *stage,
810 const struct vk_render_pass_state *rp)
811 {
812 /* Initially the valid outputs value is set to all possible render targets
813 * valid (see populate_wm_prog_key()), before we look at the shader
814 * variables. Here we look at the output variables of the shader an compute
815 * a correct number of render target outputs.
816 */
817 stage->key.wm.color_outputs_valid = 0;
818 nir_foreach_shader_out_variable_safe(var, stage->nir) {
819 if (var->data.location < FRAG_RESULT_DATA0)
820 continue;
821
822 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
823 const unsigned array_len =
824 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
825 assert(rt + array_len <= MAX_RTS);
826
827 stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
828 }
829 stage->key.wm.color_outputs_valid &=
830 (1u << rp->color_attachment_count) - 1;
831 stage->key.wm.nr_color_regions =
832 util_last_bit(stage->key.wm.color_outputs_valid);
833
834 unsigned num_rt_bindings;
835 struct anv_pipeline_binding rt_bindings[MAX_RTS];
836 if (stage->key.wm.nr_color_regions > 0) {
837 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
838 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
839 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
840 rt_bindings[rt] = (struct anv_pipeline_binding) {
841 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
842 .index = rt,
843 };
844 } else {
845 /* Setup a null render target */
846 rt_bindings[rt] = (struct anv_pipeline_binding) {
847 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
848 .index = UINT32_MAX,
849 };
850 }
851 }
852 num_rt_bindings = stage->key.wm.nr_color_regions;
853 } else {
854 /* Setup a null render target */
855 rt_bindings[0] = (struct anv_pipeline_binding) {
856 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
857 .index = UINT32_MAX,
858 };
859 num_rt_bindings = 1;
860 }
861
862 assert(num_rt_bindings <= MAX_RTS);
863 assert(stage->bind_map.surface_count == 0);
864 typed_memcpy(stage->bind_map.surface_to_descriptor,
865 rt_bindings, num_rt_bindings);
866 stage->bind_map.surface_count += num_rt_bindings;
867 }
868
869 static void
anv_pipeline_compile_fs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage)870 anv_pipeline_compile_fs(const struct elk_compiler *compiler,
871 void *mem_ctx,
872 struct anv_device *device,
873 struct anv_pipeline_stage *fs_stage,
874 struct anv_pipeline_stage *prev_stage)
875 {
876 /* TODO: we could set this to 0 based on the information in nir_shader, but
877 * we need this before we call spirv_to_nir.
878 */
879 assert(prev_stage);
880
881 struct elk_compile_fs_params params = {
882 .base = {
883 .nir = fs_stage->nir,
884 .stats = fs_stage->stats,
885 .log_data = device,
886 .mem_ctx = mem_ctx,
887 },
888 .key = &fs_stage->key.wm,
889 .prog_data = &fs_stage->prog_data.wm,
890
891 .allow_spilling = true,
892 };
893
894 fs_stage->key.wm.input_slots_valid =
895 prev_stage->prog_data.vue.vue_map.slots_valid;
896
897 fs_stage->code = elk_compile_fs(compiler, ¶ms);
898
899 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
900 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
901 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
902 }
903
904 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct elk_compile_stats * stats,uint32_t code_offset)905 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
906 struct anv_pipeline_stage *stage,
907 struct elk_compile_stats *stats,
908 uint32_t code_offset)
909 {
910 char *nir = NULL;
911 if (stage->nir &&
912 (pipeline->flags &
913 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
914 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
915 }
916
917 char *disasm = NULL;
918 if (stage->code &&
919 (pipeline->flags &
920 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
921 char *stream_data = NULL;
922 size_t stream_size = 0;
923 FILE *stream = open_memstream(&stream_data, &stream_size);
924
925 uint32_t push_size = 0;
926 for (unsigned i = 0; i < 4; i++)
927 push_size += stage->bind_map.push_ranges[i].length;
928 if (push_size > 0) {
929 fprintf(stream, "Push constant ranges:\n");
930 for (unsigned i = 0; i < 4; i++) {
931 if (stage->bind_map.push_ranges[i].length == 0)
932 continue;
933
934 fprintf(stream, " RANGE%d (%dB): ", i,
935 stage->bind_map.push_ranges[i].length * 32);
936
937 switch (stage->bind_map.push_ranges[i].set) {
938 case ANV_DESCRIPTOR_SET_NULL:
939 fprintf(stream, "NULL");
940 break;
941
942 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
943 fprintf(stream, "Vulkan push constants and API params");
944 break;
945
946 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
947 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
948 stage->bind_map.push_ranges[i].index,
949 stage->bind_map.push_ranges[i].start * 32);
950 break;
951
952 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
953 unreachable("gl_NumWorkgroups is never pushed");
954
955 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
956 fprintf(stream, "Inline shader constant data (start=%dB)",
957 stage->bind_map.push_ranges[i].start * 32);
958 break;
959
960 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
961 unreachable("Color attachments can't be pushed");
962
963 default:
964 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
965 stage->bind_map.push_ranges[i].set,
966 stage->bind_map.push_ranges[i].index,
967 stage->bind_map.push_ranges[i].start * 32);
968 break;
969 }
970 fprintf(stream, "\n");
971 }
972 fprintf(stream, "\n");
973 }
974
975 /* Creating this is far cheaper than it looks. It's perfectly fine to
976 * do it for every binary.
977 */
978 elk_disassemble_with_errors(&pipeline->device->physical->compiler->isa,
979 stage->code, code_offset, stream);
980
981 fclose(stream);
982
983 /* Copy it to a ralloc'd thing */
984 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
985 memcpy(disasm, stream_data, stream_size);
986 disasm[stream_size] = 0;
987
988 free(stream_data);
989 }
990
991 const struct anv_pipeline_executable exe = {
992 .stage = stage->stage,
993 .stats = *stats,
994 .nir = nir,
995 .disasm = disasm,
996 };
997 util_dynarray_append(&pipeline->executables,
998 struct anv_pipeline_executable, exe);
999 }
1000
1001 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct anv_shader_bin * bin)1002 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1003 struct anv_pipeline_stage *stage,
1004 struct anv_shader_bin *bin)
1005 {
1006 if (stage->stage == MESA_SHADER_FRAGMENT) {
1007 /* We pull the prog data and stats out of the anv_shader_bin because
1008 * the anv_pipeline_stage may not be fully populated if we successfully
1009 * looked up the shader in a cache.
1010 */
1011 const struct elk_wm_prog_data *wm_prog_data =
1012 (const struct elk_wm_prog_data *)bin->prog_data;
1013 struct elk_compile_stats *stats = bin->stats;
1014
1015 if (wm_prog_data->dispatch_8) {
1016 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1017 }
1018
1019 if (wm_prog_data->dispatch_16) {
1020 anv_pipeline_add_executable(pipeline, stage, stats++,
1021 wm_prog_data->prog_offset_16);
1022 }
1023
1024 if (wm_prog_data->dispatch_32) {
1025 anv_pipeline_add_executable(pipeline, stage, stats++,
1026 wm_prog_data->prog_offset_32);
1027 }
1028 } else {
1029 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1030 }
1031 }
1032
1033 static enum elk_robustness_flags
anv_device_get_robust_flags(const struct anv_device * device)1034 anv_device_get_robust_flags(const struct anv_device *device)
1035 {
1036 return device->robust_buffer_access ?
1037 (ELK_ROBUSTNESS_UBO | ELK_ROBUSTNESS_SSBO) : 0;
1038 }
1039
1040 static void
anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,struct anv_pipeline_stage * stages)1041 anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
1042 const struct vk_graphics_pipeline_state *state,
1043 struct anv_pipeline_stage *stages)
1044 {
1045 for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1046 if (!stages[s].info)
1047 continue;
1048
1049 int64_t stage_start = os_time_get_nano();
1050
1051 vk_pipeline_hash_shader_stage(stages[s].pipeline_flags, stages[s].info,
1052 NULL, stages[s].shader_sha1);
1053
1054 const struct anv_device *device = pipeline->base.device;
1055 enum elk_robustness_flags robust_flags = anv_device_get_robust_flags(device);
1056 switch (stages[s].stage) {
1057 case MESA_SHADER_VERTEX:
1058 populate_vs_prog_key(device,
1059 robust_flags,
1060 &stages[s].key.vs);
1061 break;
1062 case MESA_SHADER_TESS_CTRL:
1063 populate_tcs_prog_key(device,
1064 robust_flags,
1065 state->ts->patch_control_points,
1066 &stages[s].key.tcs);
1067 break;
1068 case MESA_SHADER_TESS_EVAL:
1069 populate_tes_prog_key(device,
1070 robust_flags,
1071 &stages[s].key.tes);
1072 break;
1073 case MESA_SHADER_GEOMETRY:
1074 populate_gs_prog_key(device,
1075 robust_flags,
1076 &stages[s].key.gs);
1077 break;
1078 case MESA_SHADER_FRAGMENT: {
1079 populate_wm_prog_key(pipeline,
1080 robust_flags,
1081 state->dynamic, state->ms, state->rp,
1082 &stages[s].key.wm);
1083 break;
1084 }
1085 default:
1086 unreachable("Invalid graphics shader stage");
1087 }
1088
1089 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1090 stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1091 }
1092
1093 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1094 }
1095
1096 static bool
anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,VkPipelineCreationFeedback * pipeline_feedback)1097 anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline,
1098 struct vk_pipeline_cache *cache,
1099 struct anv_pipeline_stage *stages,
1100 VkPipelineCreationFeedback *pipeline_feedback)
1101 {
1102 unsigned found = 0;
1103 unsigned cache_hits = 0;
1104 for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1105 if (!stages[s].info)
1106 continue;
1107
1108 int64_t stage_start = os_time_get_nano();
1109
1110 bool cache_hit;
1111 struct anv_shader_bin *bin =
1112 anv_device_search_for_kernel(pipeline->base.device, cache,
1113 &stages[s].cache_key,
1114 sizeof(stages[s].cache_key), &cache_hit);
1115 if (bin) {
1116 found++;
1117 pipeline->shaders[s] = bin;
1118 }
1119
1120 if (cache_hit) {
1121 cache_hits++;
1122 stages[s].feedback.flags |=
1123 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1124 }
1125 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1126 }
1127
1128 if (found == __builtin_popcount(pipeline->active_stages)) {
1129 if (cache_hits == found) {
1130 pipeline_feedback->flags |=
1131 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1132 }
1133 /* We found all our shaders in the cache. We're done. */
1134 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1135 if (!stages[s].info)
1136 continue;
1137
1138 anv_pipeline_add_executables(&pipeline->base, &stages[s],
1139 pipeline->shaders[s]);
1140 }
1141 return true;
1142 } else if (found > 0) {
1143 /* We found some but not all of our shaders. This shouldn't happen most
1144 * of the time but it can if we have a partially populated pipeline
1145 * cache.
1146 */
1147 assert(found < __builtin_popcount(pipeline->active_stages));
1148
1149 vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1150 &pipeline->base.device->vk.base),
1151 "Found a partial pipeline in the cache. This is "
1152 "most likely caused by an incomplete pipeline cache "
1153 "import or export");
1154
1155 /* We're going to have to recompile anyway, so just throw away our
1156 * references to the shaders in the cache. We'll get them out of the
1157 * cache again as part of the compilation process.
1158 */
1159 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1160 stages[s].feedback.flags = 0;
1161 if (pipeline->shaders[s]) {
1162 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1163 pipeline->shaders[s] = NULL;
1164 }
1165 }
1166 }
1167
1168 return false;
1169 }
1170
1171 static const gl_shader_stage graphics_shader_order[] = {
1172 MESA_SHADER_VERTEX,
1173 MESA_SHADER_TESS_CTRL,
1174 MESA_SHADER_TESS_EVAL,
1175 MESA_SHADER_GEOMETRY,
1176
1177 MESA_SHADER_FRAGMENT,
1178 };
1179
1180 static VkResult
anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,void * pipeline_ctx)1181 anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline,
1182 struct vk_pipeline_cache *cache,
1183 struct anv_pipeline_stage *stages,
1184 void *pipeline_ctx)
1185 {
1186 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1187 gl_shader_stage s = graphics_shader_order[i];
1188 if (!stages[s].info)
1189 continue;
1190
1191 int64_t stage_start = os_time_get_nano();
1192
1193 assert(stages[s].stage == s);
1194 assert(pipeline->shaders[s] == NULL);
1195
1196 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1197 .surface_to_descriptor = stages[s].surface_to_descriptor,
1198 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1199 };
1200
1201 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1202 pipeline_ctx,
1203 &stages[s]);
1204 if (stages[s].nir == NULL) {
1205 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1206 }
1207
1208 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1209 }
1210
1211 return VK_SUCCESS;
1212 }
1213
1214 static VkResult
anv_graphics_pipeline_compile(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * info,const struct vk_graphics_pipeline_state * state)1215 anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
1216 struct vk_pipeline_cache *cache,
1217 const VkGraphicsPipelineCreateInfo *info,
1218 const struct vk_graphics_pipeline_state *state)
1219 {
1220 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1221 VkResult result;
1222
1223 const VkPipelineCreateFlags2KHR pipeline_flags =
1224 vk_graphics_pipeline_create_flags(info);
1225
1226 VkPipelineCreationFeedback pipeline_feedback = {
1227 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1228 };
1229 int64_t pipeline_start = os_time_get_nano();
1230
1231 const struct elk_compiler *compiler = pipeline->base.device->physical->compiler;
1232 struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1233 for (uint32_t i = 0; i < info->stageCount; i++) {
1234 gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
1235 stages[stage].stage = stage;
1236 stages[stage].pipeline_flags = pipeline_flags;
1237 stages[stage].info = &info->pStages[i];
1238 }
1239
1240 anv_graphics_pipeline_init_keys(pipeline, state, stages);
1241
1242 unsigned char sha1[20];
1243 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1244
1245 for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) {
1246 if (!stages[s].info)
1247 continue;
1248
1249 stages[s].cache_key.stage = s;
1250 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1251 }
1252
1253 const bool skip_cache_lookup =
1254 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1255 if (!skip_cache_lookup) {
1256 bool found_all_shaders =
1257 anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
1258 &pipeline_feedback);
1259 if (found_all_shaders)
1260 goto done;
1261 }
1262
1263 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
1264 return VK_PIPELINE_COMPILE_REQUIRED;
1265
1266 void *pipeline_ctx = ralloc_context(NULL);
1267
1268 result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
1269 pipeline_ctx);
1270 if (result != VK_SUCCESS)
1271 goto fail;
1272
1273 /* Walk backwards to link */
1274 struct anv_pipeline_stage *next_stage = NULL;
1275 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1276 gl_shader_stage s = graphics_shader_order[i];
1277 if (!stages[s].info)
1278 continue;
1279
1280 switch (s) {
1281 case MESA_SHADER_VERTEX:
1282 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1283 break;
1284 case MESA_SHADER_TESS_CTRL:
1285 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1286 break;
1287 case MESA_SHADER_TESS_EVAL:
1288 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1289 break;
1290 case MESA_SHADER_GEOMETRY:
1291 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1292 break;
1293 case MESA_SHADER_FRAGMENT:
1294 anv_pipeline_link_fs(compiler, &stages[s], state->rp);
1295 break;
1296 default:
1297 unreachable("Invalid graphics shader stage");
1298 }
1299
1300 next_stage = &stages[s];
1301 }
1302
1303 struct anv_pipeline_stage *prev_stage = NULL;
1304 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1305 gl_shader_stage s = graphics_shader_order[i];
1306 if (!stages[s].info)
1307 continue;
1308
1309 int64_t stage_start = os_time_get_nano();
1310
1311 void *stage_ctx = ralloc_context(NULL);
1312
1313 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1314
1315 if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
1316 prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1317 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1318 stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1319 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1320 prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1321 stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1322 }
1323
1324 ralloc_free(stage_ctx);
1325
1326 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1327
1328 prev_stage = &stages[s];
1329 }
1330
1331 prev_stage = NULL;
1332 for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1333 gl_shader_stage s = graphics_shader_order[i];
1334 if (!stages[s].info)
1335 continue;
1336
1337 int64_t stage_start = os_time_get_nano();
1338
1339 void *stage_ctx = ralloc_context(NULL);
1340
1341 switch (s) {
1342 case MESA_SHADER_VERTEX:
1343 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1344 &stages[s]);
1345 break;
1346 case MESA_SHADER_TESS_CTRL:
1347 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1348 &stages[s], prev_stage);
1349 break;
1350 case MESA_SHADER_TESS_EVAL:
1351 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1352 &stages[s], prev_stage);
1353 break;
1354 case MESA_SHADER_GEOMETRY:
1355 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1356 &stages[s], prev_stage);
1357 break;
1358 case MESA_SHADER_FRAGMENT:
1359 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1360 &stages[s], prev_stage);
1361 break;
1362 default:
1363 unreachable("Invalid graphics shader stage");
1364 }
1365 if (stages[s].code == NULL) {
1366 ralloc_free(stage_ctx);
1367 result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1368 goto fail;
1369 }
1370
1371 anv_nir_validate_push_layout(&stages[s].prog_data.base,
1372 &stages[s].bind_map);
1373
1374 struct anv_shader_bin *bin =
1375 anv_device_upload_kernel(pipeline->base.device, cache, s,
1376 &stages[s].cache_key,
1377 sizeof(stages[s].cache_key),
1378 stages[s].code,
1379 stages[s].prog_data.base.program_size,
1380 &stages[s].prog_data.base,
1381 elk_prog_data_size(s),
1382 stages[s].stats, stages[s].num_stats,
1383 stages[s].nir->xfb_info,
1384 &stages[s].bind_map);
1385 if (!bin) {
1386 ralloc_free(stage_ctx);
1387 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1388 goto fail;
1389 }
1390
1391 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1392
1393 pipeline->shaders[s] = bin;
1394 ralloc_free(stage_ctx);
1395
1396 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1397
1398 prev_stage = &stages[s];
1399 }
1400
1401 ralloc_free(pipeline_ctx);
1402
1403 done:
1404
1405 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1406
1407 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1408 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1409 if (create_feedback) {
1410 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1411
1412 uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
1413 assert(stage_count == 0 || info->stageCount == stage_count);
1414 for (uint32_t i = 0; i < stage_count; i++) {
1415 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1416 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1417 }
1418 }
1419
1420 return VK_SUCCESS;
1421
1422 fail:
1423 ralloc_free(pipeline_ctx);
1424
1425 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1426 if (pipeline->shaders[s])
1427 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1428 }
1429
1430 return result;
1431 }
1432
1433 static VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * info)1434 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1435 struct vk_pipeline_cache *cache,
1436 const VkComputePipelineCreateInfo *info)
1437 {
1438 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
1439 assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
1440
1441 VkPipelineCreationFeedback pipeline_feedback = {
1442 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1443 };
1444 int64_t pipeline_start = os_time_get_nano();
1445
1446 struct anv_device *device = pipeline->base.device;
1447 const struct elk_compiler *compiler = device->physical->compiler;
1448
1449 struct anv_pipeline_stage stage = {
1450 .stage = MESA_SHADER_COMPUTE,
1451 .pipeline_flags = vk_compute_pipeline_create_flags(info),
1452 .info = &info->stage,
1453 .cache_key = {
1454 .stage = MESA_SHADER_COMPUTE,
1455 },
1456 .feedback = {
1457 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1458 },
1459 };
1460 vk_pipeline_hash_shader_stage(stage.pipeline_flags, &info->stage,
1461 NULL, stage.shader_sha1);
1462
1463 struct anv_shader_bin *bin = NULL;
1464
1465 populate_cs_prog_key(device,
1466 anv_device_get_robust_flags(device),
1467 &stage.key.cs);
1468
1469 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1470
1471 const bool skip_cache_lookup =
1472 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1473
1474 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1475
1476 bool cache_hit = false;
1477 if (!skip_cache_lookup) {
1478 bin = anv_device_search_for_kernel(device, cache,
1479 &stage.cache_key,
1480 sizeof(stage.cache_key),
1481 &cache_hit);
1482 }
1483
1484 if (bin == NULL &&
1485 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1486 return VK_PIPELINE_COMPILE_REQUIRED;
1487
1488 void *mem_ctx = ralloc_context(NULL);
1489 if (bin == NULL) {
1490 int64_t stage_start = os_time_get_nano();
1491
1492 stage.bind_map = (struct anv_pipeline_bind_map) {
1493 .surface_to_descriptor = stage.surface_to_descriptor,
1494 .sampler_to_descriptor = stage.sampler_to_descriptor
1495 };
1496
1497 /* Set up a binding for the gl_NumWorkGroups */
1498 stage.bind_map.surface_count = 1;
1499 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1500 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1501 };
1502
1503 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1504 if (stage.nir == NULL) {
1505 ralloc_free(mem_ctx);
1506 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1507 }
1508
1509 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1510
1511 unsigned local_size = stage.nir->info.workgroup_size[0] *
1512 stage.nir->info.workgroup_size[1] *
1513 stage.nir->info.workgroup_size[2];
1514
1515 /* Games don't always request full subgroups when they should,
1516 * which can cause bugs, as they may expect bigger size of the
1517 * subgroup than we choose for the execution.
1518 */
1519 if (device->physical->instance->assume_full_subgroups &&
1520 stage.nir->info.uses_wide_subgroup_intrinsics &&
1521 stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
1522 local_size &&
1523 local_size % ELK_SUBGROUP_SIZE == 0)
1524 stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
1525
1526 /* If the client requests that we dispatch full subgroups but doesn't
1527 * allow us to pick a subgroup size, we have to smash it to the API
1528 * value of 32. Performance will likely be terrible in this case but
1529 * there's nothing we can do about that. The client should have chosen
1530 * a size.
1531 */
1532 if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
1533 stage.nir->info.subgroup_size =
1534 device->physical->instance->assume_full_subgroups != 0 ?
1535 device->physical->instance->assume_full_subgroups : ELK_SUBGROUP_SIZE;
1536
1537 stage.num_stats = 1;
1538
1539 struct elk_compile_cs_params params = {
1540 .base = {
1541 .nir = stage.nir,
1542 .stats = stage.stats,
1543 .log_data = device,
1544 .mem_ctx = mem_ctx,
1545 },
1546 .key = &stage.key.cs,
1547 .prog_data = &stage.prog_data.cs,
1548 };
1549
1550 stage.code = elk_compile_cs(compiler, ¶ms);
1551 if (stage.code == NULL) {
1552 ralloc_free(mem_ctx);
1553 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1554 }
1555
1556 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1557
1558 if (!stage.prog_data.cs.uses_num_work_groups) {
1559 assert(stage.bind_map.surface_to_descriptor[0].set ==
1560 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1561 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1562 }
1563
1564 const unsigned code_size = stage.prog_data.base.program_size;
1565 bin = anv_device_upload_kernel(device, cache,
1566 MESA_SHADER_COMPUTE,
1567 &stage.cache_key, sizeof(stage.cache_key),
1568 stage.code, code_size,
1569 &stage.prog_data.base,
1570 sizeof(stage.prog_data.cs),
1571 stage.stats, stage.num_stats,
1572 NULL, &stage.bind_map);
1573 if (!bin) {
1574 ralloc_free(mem_ctx);
1575 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1576 }
1577
1578 stage.feedback.duration = os_time_get_nano() - stage_start;
1579 }
1580
1581 anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1582
1583 ralloc_free(mem_ctx);
1584
1585 if (cache_hit) {
1586 stage.feedback.flags |=
1587 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1588 pipeline_feedback.flags |=
1589 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1590 }
1591 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1592
1593 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1594 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1595 if (create_feedback) {
1596 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1597
1598 if (create_feedback->pipelineStageCreationFeedbackCount) {
1599 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1600 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1601 }
1602 }
1603
1604 pipeline->cs = bin;
1605
1606 return VK_SUCCESS;
1607 }
1608
1609 static VkResult
anv_compute_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)1610 anv_compute_pipeline_create(struct anv_device *device,
1611 struct vk_pipeline_cache *cache,
1612 const VkComputePipelineCreateInfo *pCreateInfo,
1613 const VkAllocationCallbacks *pAllocator,
1614 VkPipeline *pPipeline)
1615 {
1616 struct anv_compute_pipeline *pipeline;
1617 VkResult result;
1618
1619 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1620
1621 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1622 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1623 if (pipeline == NULL)
1624 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1625
1626 result = anv_pipeline_init(&pipeline->base, device,
1627 ANV_PIPELINE_COMPUTE, pCreateInfo->flags,
1628 pAllocator);
1629 if (result != VK_SUCCESS) {
1630 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1631 return result;
1632 }
1633
1634 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1635 pipeline->batch_data, sizeof(pipeline->batch_data));
1636
1637 result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
1638 if (result != VK_SUCCESS) {
1639 anv_pipeline_finish(&pipeline->base, device, pAllocator);
1640 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1641 return result;
1642 }
1643
1644 anv_genX(device->info, compute_pipeline_emit)(pipeline);
1645
1646 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1647
1648 return pipeline->base.batch.status;
1649 }
1650
anv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1651 VkResult anv_CreateComputePipelines(
1652 VkDevice _device,
1653 VkPipelineCache pipelineCache,
1654 uint32_t count,
1655 const VkComputePipelineCreateInfo* pCreateInfos,
1656 const VkAllocationCallbacks* pAllocator,
1657 VkPipeline* pPipelines)
1658 {
1659 ANV_FROM_HANDLE(anv_device, device, _device);
1660 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
1661
1662 VkResult result = VK_SUCCESS;
1663
1664 unsigned i;
1665 for (i = 0; i < count; i++) {
1666 VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
1667 &pCreateInfos[i],
1668 pAllocator, &pPipelines[i]);
1669
1670 if (res == VK_SUCCESS)
1671 continue;
1672
1673 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
1674 * is not obvious what error should be report upon 2 different failures.
1675 * */
1676 result = res;
1677 if (res != VK_PIPELINE_COMPILE_REQUIRED)
1678 break;
1679
1680 pPipelines[i] = VK_NULL_HANDLE;
1681
1682 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1683 break;
1684 }
1685
1686 for (; i < count; i++)
1687 pPipelines[i] = VK_NULL_HANDLE;
1688
1689 return result;
1690 }
1691
1692 /**
1693 * Calculate the desired L3 partitioning based on the current state of the
1694 * pipeline. For now this simply returns the conservative defaults calculated
1695 * by get_default_l3_weights(), but we could probably do better by gathering
1696 * more statistics from the pipeline state (e.g. guess of expected URB usage
1697 * and bound surfaces), or by using feed-back from performance counters.
1698 */
1699 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)1700 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1701 {
1702 const struct intel_device_info *devinfo = pipeline->device->info;
1703
1704 const struct intel_l3_weights w =
1705 intel_get_default_l3_weights(devinfo, true, needs_slm);
1706
1707 pipeline->l3_config = intel_get_l3_config(devinfo, w);
1708 }
1709
1710 static VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline * pipeline,struct anv_device * device,struct vk_pipeline_cache * cache,const struct VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state,const VkAllocationCallbacks * alloc)1711 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
1712 struct anv_device *device,
1713 struct vk_pipeline_cache *cache,
1714 const struct VkGraphicsPipelineCreateInfo *pCreateInfo,
1715 const struct vk_graphics_pipeline_state *state,
1716 const VkAllocationCallbacks *alloc)
1717 {
1718 VkResult result;
1719
1720 result = anv_pipeline_init(&pipeline->base, device,
1721 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
1722 alloc);
1723 if (result != VK_SUCCESS)
1724 return result;
1725
1726 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1727 pipeline->batch_data, sizeof(pipeline->batch_data));
1728
1729 pipeline->active_stages = 0;
1730 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
1731 pipeline->active_stages |= pCreateInfo->pStages[i].stage;
1732
1733 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1734 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1735
1736 pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
1737 vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
1738
1739 pipeline->depth_clamp_enable = state->rs->depth_clamp_enable;
1740 pipeline->depth_clip_enable =
1741 vk_rasterization_state_depth_clip_enable(state->rs);
1742 pipeline->view_mask = state->rp->view_mask;
1743
1744 result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state);
1745 if (result != VK_SUCCESS) {
1746 anv_pipeline_finish(&pipeline->base, device, alloc);
1747 return result;
1748 }
1749
1750 anv_pipeline_setup_l3_config(&pipeline->base, false);
1751
1752 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1753
1754 u_foreach_bit(a, state->vi->attributes_valid) {
1755 if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
1756 pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
1757 }
1758
1759 u_foreach_bit(b, state->vi->bindings_valid) {
1760 pipeline->vb[b].stride = state->vi->bindings[b].stride;
1761 pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
1762 VK_VERTEX_INPUT_RATE_INSTANCE;
1763 pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
1764 }
1765
1766 pipeline->instance_multiplier = 1;
1767 if (pipeline->view_mask)
1768 pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
1769
1770 pipeline->negative_one_to_one =
1771 state->vp != NULL && state->vp->depth_clip_negative_one_to_one;
1772
1773 /* Store line mode, polygon mode and rasterization samples, these are used
1774 * for dynamic primitive topology.
1775 */
1776 pipeline->polygon_mode = state->rs->polygon_mode;
1777 pipeline->rasterization_samples =
1778 state->ms != NULL ? state->ms->rasterization_samples : 1;
1779 pipeline->line_mode = state->rs->line.mode;
1780 if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
1781 if (pipeline->rasterization_samples > 1) {
1782 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
1783 } else {
1784 pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
1785 }
1786 }
1787 pipeline->patch_control_points =
1788 state->ts != NULL ? state->ts->patch_control_points : 0;
1789
1790 /* Store the color write masks, to be merged with color write enable if
1791 * dynamic.
1792 */
1793 if (state->cb != NULL) {
1794 for (unsigned i = 0; i < state->cb->attachment_count; i++)
1795 pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask;
1796 }
1797
1798 return VK_SUCCESS;
1799 }
1800
1801 static VkResult
anv_graphics_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)1802 anv_graphics_pipeline_create(struct anv_device *device,
1803 struct vk_pipeline_cache *cache,
1804 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1805 const VkAllocationCallbacks *pAllocator,
1806 VkPipeline *pPipeline)
1807 {
1808 struct anv_graphics_pipeline *pipeline;
1809 VkResult result;
1810
1811 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1812
1813 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1814 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1815 if (pipeline == NULL)
1816 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1817
1818 struct vk_graphics_pipeline_all_state all;
1819 struct vk_graphics_pipeline_state state = { };
1820 result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
1821 NULL /* driver_rp */,
1822 0 /* driver_rp_flags */,
1823 &all, NULL, 0, NULL);
1824 if (result != VK_SUCCESS) {
1825 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1826 return result;
1827 }
1828
1829 result = anv_graphics_pipeline_init(pipeline, device, cache,
1830 pCreateInfo, &state, pAllocator);
1831 if (result != VK_SUCCESS) {
1832 vk_free2(&device->vk.alloc, pAllocator, pipeline);
1833 return result;
1834 }
1835
1836 anv_genX(device->info, graphics_pipeline_emit)(pipeline, &state);
1837
1838 *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1839
1840 return pipeline->base.batch.status;
1841 }
1842
anv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1843 VkResult anv_CreateGraphicsPipelines(
1844 VkDevice _device,
1845 VkPipelineCache pipelineCache,
1846 uint32_t count,
1847 const VkGraphicsPipelineCreateInfo* pCreateInfos,
1848 const VkAllocationCallbacks* pAllocator,
1849 VkPipeline* pPipelines)
1850 {
1851 ANV_FROM_HANDLE(anv_device, device, _device);
1852 ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
1853
1854 VkResult result = VK_SUCCESS;
1855
1856 unsigned i;
1857 for (i = 0; i < count; i++) {
1858 VkResult res = anv_graphics_pipeline_create(device,
1859 pipeline_cache,
1860 &pCreateInfos[i],
1861 pAllocator, &pPipelines[i]);
1862
1863 if (res == VK_SUCCESS)
1864 continue;
1865
1866 /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
1867 * is not obvious what error should be report upon 2 different failures.
1868 * */
1869 result = res;
1870 if (res != VK_PIPELINE_COMPILE_REQUIRED)
1871 break;
1872
1873 pPipelines[i] = VK_NULL_HANDLE;
1874
1875 if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1876 break;
1877 }
1878
1879 for (; i < count; i++)
1880 pPipelines[i] = VK_NULL_HANDLE;
1881
1882 return result;
1883 }
1884
1885 #define WRITE_STR(field, ...) ({ \
1886 memset(field, 0, sizeof(field)); \
1887 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
1888 assert(i > 0 && i < sizeof(field)); \
1889 })
1890
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)1891 VkResult anv_GetPipelineExecutablePropertiesKHR(
1892 VkDevice device,
1893 const VkPipelineInfoKHR* pPipelineInfo,
1894 uint32_t* pExecutableCount,
1895 VkPipelineExecutablePropertiesKHR* pProperties)
1896 {
1897 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
1898 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
1899 pProperties, pExecutableCount);
1900
1901 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
1902 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
1903 gl_shader_stage stage = exe->stage;
1904 props->stages = mesa_to_vk_shader_stage(stage);
1905
1906 unsigned simd_width = exe->stats.dispatch_width;
1907 if (stage == MESA_SHADER_FRAGMENT) {
1908 WRITE_STR(props->name, "%s%d %s",
1909 simd_width ? "SIMD" : "vec",
1910 simd_width ? simd_width : 4,
1911 _mesa_shader_stage_to_string(stage));
1912 } else {
1913 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
1914 }
1915 WRITE_STR(props->description, "%s%d %s shader",
1916 simd_width ? "SIMD" : "vec",
1917 simd_width ? simd_width : 4,
1918 _mesa_shader_stage_to_string(stage));
1919
1920 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
1921 * wants a subgroup size of 1.
1922 */
1923 props->subgroupSize = MAX2(simd_width, 1);
1924 }
1925 }
1926
1927 return vk_outarray_status(&out);
1928 }
1929
1930 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)1931 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
1932 {
1933 assert(index < util_dynarray_num_elements(&pipeline->executables,
1934 struct anv_pipeline_executable));
1935 return util_dynarray_element(
1936 &pipeline->executables, struct anv_pipeline_executable, index);
1937 }
1938
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)1939 VkResult anv_GetPipelineExecutableStatisticsKHR(
1940 VkDevice device,
1941 const VkPipelineExecutableInfoKHR* pExecutableInfo,
1942 uint32_t* pStatisticCount,
1943 VkPipelineExecutableStatisticKHR* pStatistics)
1944 {
1945 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
1946 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
1947 pStatistics, pStatisticCount);
1948
1949 const struct anv_pipeline_executable *exe =
1950 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
1951
1952 const struct elk_stage_prog_data *prog_data;
1953 switch (pipeline->type) {
1954 case ANV_PIPELINE_GRAPHICS: {
1955 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
1956 break;
1957 }
1958 case ANV_PIPELINE_COMPUTE: {
1959 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
1960 break;
1961 }
1962 default:
1963 unreachable("invalid pipeline type");
1964 }
1965
1966 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
1967 WRITE_STR(stat->name, "Instruction Count");
1968 WRITE_STR(stat->description,
1969 "Number of GEN instructions in the final generated "
1970 "shader executable.");
1971 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1972 stat->value.u64 = exe->stats.instructions;
1973 }
1974
1975 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
1976 WRITE_STR(stat->name, "SEND Count");
1977 WRITE_STR(stat->description,
1978 "Number of instructions in the final generated shader "
1979 "executable which access external units such as the "
1980 "constant cache or the sampler.");
1981 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1982 stat->value.u64 = exe->stats.sends;
1983 }
1984
1985 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
1986 WRITE_STR(stat->name, "Loop Count");
1987 WRITE_STR(stat->description,
1988 "Number of loops (not unrolled) in the final generated "
1989 "shader executable.");
1990 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1991 stat->value.u64 = exe->stats.loops;
1992 }
1993
1994 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
1995 WRITE_STR(stat->name, "Cycle Count");
1996 WRITE_STR(stat->description,
1997 "Estimate of the number of EU cycles required to execute "
1998 "the final generated executable. This is an estimate only "
1999 "and may vary greatly from actual run-time performance.");
2000 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2001 stat->value.u64 = exe->stats.cycles;
2002 }
2003
2004 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2005 WRITE_STR(stat->name, "Spill Count");
2006 WRITE_STR(stat->description,
2007 "Number of scratch spill operations. This gives a rough "
2008 "estimate of the cost incurred due to spilling temporary "
2009 "values to memory. If this is non-zero, you may want to "
2010 "adjust your shader to reduce register pressure.");
2011 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2012 stat->value.u64 = exe->stats.spills;
2013 }
2014
2015 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2016 WRITE_STR(stat->name, "Fill Count");
2017 WRITE_STR(stat->description,
2018 "Number of scratch fill operations. This gives a rough "
2019 "estimate of the cost incurred due to spilling temporary "
2020 "values to memory. If this is non-zero, you may want to "
2021 "adjust your shader to reduce register pressure.");
2022 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2023 stat->value.u64 = exe->stats.fills;
2024 }
2025
2026 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2027 WRITE_STR(stat->name, "Scratch Memory Size");
2028 WRITE_STR(stat->description,
2029 "Number of bytes of scratch memory required by the "
2030 "generated shader executable. If this is non-zero, you "
2031 "may want to adjust your shader to reduce register "
2032 "pressure.");
2033 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2034 stat->value.u64 = prog_data->total_scratch;
2035 }
2036
2037 if (gl_shader_stage_uses_workgroup(exe->stage)) {
2038 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2039 WRITE_STR(stat->name, "Workgroup Memory Size");
2040 WRITE_STR(stat->description,
2041 "Number of bytes of workgroup shared memory used by this "
2042 "shader including any padding.");
2043 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2044 stat->value.u64 = prog_data->total_shared;
2045 }
2046 }
2047
2048 return vk_outarray_status(&out);
2049 }
2050
2051 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)2052 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
2053 const char *data)
2054 {
2055 ir->isText = VK_TRUE;
2056
2057 size_t data_len = strlen(data) + 1;
2058
2059 if (ir->pData == NULL) {
2060 ir->dataSize = data_len;
2061 return true;
2062 }
2063
2064 strncpy(ir->pData, data, ir->dataSize);
2065 if (ir->dataSize < data_len)
2066 return false;
2067
2068 ir->dataSize = data_len;
2069 return true;
2070 }
2071
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)2072 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
2073 VkDevice device,
2074 const VkPipelineExecutableInfoKHR* pExecutableInfo,
2075 uint32_t* pInternalRepresentationCount,
2076 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
2077 {
2078 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2079 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
2080 pInternalRepresentations, pInternalRepresentationCount);
2081 bool incomplete_text = false;
2082
2083 const struct anv_pipeline_executable *exe =
2084 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2085
2086 if (exe->nir) {
2087 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
2088 WRITE_STR(ir->name, "Final NIR");
2089 WRITE_STR(ir->description,
2090 "Final NIR before going into the back-end compiler");
2091
2092 if (!write_ir_text(ir, exe->nir))
2093 incomplete_text = true;
2094 }
2095 }
2096
2097 if (exe->disasm) {
2098 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
2099 WRITE_STR(ir->name, "GEN Assembly");
2100 WRITE_STR(ir->description,
2101 "Final GEN assembly for the generated shader binary");
2102
2103 if (!write_ir_text(ir, exe->disasm))
2104 incomplete_text = true;
2105 }
2106 }
2107
2108 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
2109 }
2110