xref: /aosp_15_r20/external/mesa3d/src/gallium/frontends/lavapipe/lvp_pipeline.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Red Hat.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "lvp_private.h"
25 #include "vk_nir_convert_ycbcr.h"
26 #include "vk_pipeline.h"
27 #include "vk_render_pass.h"
28 #include "vk_util.h"
29 #include "glsl_types.h"
30 #include "util/os_time.h"
31 #include "spirv/nir_spirv.h"
32 #include "nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34 #include "lvp_lower_vulkan_resource.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "nir/nir_xfb_info.h"
38 
39 #define SPIR_V_MAGIC_NUMBER 0x07230203
40 
41 #define MAX_DYNAMIC_STATES 72
42 
43 typedef void (*cso_destroy_func)(struct pipe_context*, void*);
44 
45 static void
shader_destroy(struct lvp_device * device,struct lvp_shader * shader,bool locked)46 shader_destroy(struct lvp_device *device, struct lvp_shader *shader, bool locked)
47 {
48    if (!shader->pipeline_nir)
49       return;
50    gl_shader_stage stage = shader->pipeline_nir->nir->info.stage;
51    cso_destroy_func destroy[] = {
52       device->queue.ctx->delete_vs_state,
53       device->queue.ctx->delete_tcs_state,
54       device->queue.ctx->delete_tes_state,
55       device->queue.ctx->delete_gs_state,
56       device->queue.ctx->delete_fs_state,
57       device->queue.ctx->delete_compute_state,
58       device->queue.ctx->delete_ts_state,
59       device->queue.ctx->delete_ms_state,
60    };
61 
62    if (!locked)
63       simple_mtx_lock(&device->queue.lock);
64 
65    set_foreach(&shader->inlines.variants, entry) {
66       struct lvp_inline_variant *variant = (void*)entry->key;
67       destroy[stage](device->queue.ctx, variant->cso);
68       free(variant);
69    }
70    ralloc_free(shader->inlines.variants.table);
71 
72    if (shader->shader_cso)
73       destroy[stage](device->queue.ctx, shader->shader_cso);
74    if (shader->tess_ccw_cso)
75       destroy[stage](device->queue.ctx, shader->tess_ccw_cso);
76 
77    if (!locked)
78       simple_mtx_unlock(&device->queue.lock);
79 
80    lvp_pipeline_nir_ref(&shader->pipeline_nir, NULL);
81    lvp_pipeline_nir_ref(&shader->tess_ccw, NULL);
82 }
83 
84 void
lvp_pipeline_destroy(struct lvp_device * device,struct lvp_pipeline * pipeline,bool locked)85 lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline, bool locked)
86 {
87    lvp_forall_stage(i)
88       shader_destroy(device, &pipeline->shaders[i], locked);
89 
90    if (pipeline->layout)
91       vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
92 
93    for (unsigned i = 0; i < pipeline->num_groups; i++) {
94       LVP_FROM_HANDLE(lvp_pipeline, p, pipeline->groups[i]);
95       lvp_pipeline_destroy(device, p, locked);
96    }
97 
98    if (pipeline->rt.stages) {
99       for (uint32_t i = 0; i < pipeline->rt.stage_count; i++)
100          lvp_pipeline_nir_ref(pipeline->rt.stages + i, NULL);
101    }
102 
103    free(pipeline->rt.stages);
104    free(pipeline->rt.groups);
105 
106    vk_free(&device->vk.alloc, pipeline->state_data);
107    vk_object_base_finish(&pipeline->base);
108    vk_free(&device->vk.alloc, pipeline);
109 }
110 
lvp_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)111 VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
112    VkDevice                                    _device,
113    VkPipeline                                  _pipeline,
114    const VkAllocationCallbacks*                pAllocator)
115 {
116    LVP_FROM_HANDLE(lvp_device, device, _device);
117    LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline);
118 
119    if (!_pipeline)
120       return;
121 
122    if (pipeline->used) {
123       simple_mtx_lock(&device->queue.lock);
124       util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline);
125       simple_mtx_unlock(&device->queue.lock);
126    } else {
127       lvp_pipeline_destroy(device, pipeline, false);
128    }
129 }
130 
131 static void
shared_var_info(const struct glsl_type * type,unsigned * size,unsigned * align)132 shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
133 {
134    assert(glsl_type_is_vector_or_scalar(type));
135 
136    uint32_t comp_size = glsl_type_is_boolean(type)
137       ? 4 : glsl_get_bit_size(type) / 8;
138    unsigned length = glsl_get_vector_elements(type);
139    *size = comp_size * length,
140       *align = comp_size;
141 }
142 
143 static bool
remove_barriers_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)144 remove_barriers_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
145 {
146    if (intr->intrinsic != nir_intrinsic_barrier)
147       return false;
148    if (data) {
149       if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE)
150          return false;
151 
152       if (nir_intrinsic_memory_scope(intr) == SCOPE_WORKGROUP ||
153           nir_intrinsic_memory_scope(intr) == SCOPE_DEVICE ||
154           nir_intrinsic_memory_scope(intr) == SCOPE_QUEUE_FAMILY)
155          return false;
156    }
157    nir_instr_remove(&intr->instr);
158    return true;
159 }
160 
161 static bool
remove_barriers(nir_shader * nir,bool is_compute)162 remove_barriers(nir_shader *nir, bool is_compute)
163 {
164    return nir_shader_intrinsics_pass(nir, remove_barriers_impl,
165                                      nir_metadata_dominance,
166                                      (void*)is_compute);
167 }
168 
169 static bool
lower_demote_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)170 lower_demote_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
171 {
172    if (intr->intrinsic == nir_intrinsic_demote) {
173       intr->intrinsic = nir_intrinsic_terminate;
174       return true;
175    }
176    if (intr->intrinsic == nir_intrinsic_demote_if) {
177       intr->intrinsic = nir_intrinsic_terminate_if;
178       return true;
179    }
180    return false;
181 }
182 
183 static bool
lower_demote(nir_shader * nir)184 lower_demote(nir_shader *nir)
185 {
186    return nir_shader_intrinsics_pass(nir, lower_demote_impl,
187                                      nir_metadata_dominance, NULL);
188 }
189 
190 static bool
find_tex(const nir_instr * instr,const void * data_cb)191 find_tex(const nir_instr *instr, const void *data_cb)
192 {
193    if (instr->type == nir_instr_type_tex)
194       return true;
195    return false;
196 }
197 
198 static nir_def *
fixup_tex_instr(struct nir_builder * b,nir_instr * instr,void * data_cb)199 fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb)
200 {
201    nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
202    unsigned offset = 0;
203 
204    int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset);
205    if (idx == -1)
206       return NULL;
207 
208    if (!nir_src_is_const(tex_instr->src[idx].src))
209       return NULL;
210    offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0);
211 
212    nir_tex_instr_remove_src(tex_instr, idx);
213    tex_instr->texture_index += offset;
214    return NIR_LOWER_INSTR_PROGRESS;
215 }
216 
217 static bool
lvp_nir_fixup_indirect_tex(nir_shader * shader)218 lvp_nir_fixup_indirect_tex(nir_shader *shader)
219 {
220    return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL);
221 }
222 
223 static void
optimize(nir_shader * nir)224 optimize(nir_shader *nir)
225 {
226    bool progress = false;
227    do {
228       progress = false;
229 
230       NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
231       NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
232       NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
233       NIR_PASS(progress, nir, nir_opt_deref);
234       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
235 
236       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
237 
238       NIR_PASS(progress, nir, nir_copy_prop);
239       NIR_PASS(progress, nir, nir_opt_dce);
240       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
241 
242       NIR_PASS(progress, nir, nir_opt_algebraic);
243       NIR_PASS(progress, nir, nir_opt_constant_folding);
244 
245       NIR_PASS(progress, nir, nir_opt_remove_phis);
246       bool loop = false;
247       NIR_PASS(loop, nir, nir_opt_loop);
248       progress |= loop;
249       if (loop) {
250          /* If nir_opt_loop makes progress, then we need to clean
251           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
252           * to make progress.
253           */
254          NIR_PASS(progress, nir, nir_copy_prop);
255          NIR_PASS(progress, nir, nir_opt_dce);
256          NIR_PASS(progress, nir, nir_opt_remove_phis);
257       }
258       NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
259       NIR_PASS(progress, nir, nir_opt_dead_cf);
260       NIR_PASS(progress, nir, nir_opt_conditional_discard);
261       NIR_PASS(progress, nir, nir_opt_remove_phis);
262       NIR_PASS(progress, nir, nir_opt_cse);
263       NIR_PASS(progress, nir, nir_opt_undef);
264 
265       NIR_PASS(progress, nir, nir_opt_deref);
266       NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
267       NIR_PASS(progress, nir, nir_opt_loop_unroll);
268       NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex);
269    } while (progress);
270 }
271 
272 void
lvp_shader_optimize(nir_shader * nir)273 lvp_shader_optimize(nir_shader *nir)
274 {
275    optimize(nir);
276    NIR_PASS_V(nir, nir_lower_var_copies);
277    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
278    NIR_PASS_V(nir, nir_opt_dce);
279    nir_sweep(nir);
280 }
281 
282 struct lvp_pipeline_nir *
lvp_create_pipeline_nir(nir_shader * nir)283 lvp_create_pipeline_nir(nir_shader *nir)
284 {
285    struct lvp_pipeline_nir *pipeline_nir = ralloc(NULL, struct lvp_pipeline_nir);
286    pipeline_nir->nir = nir;
287    pipeline_nir->ref_cnt = 1;
288    return pipeline_nir;
289 }
290 
291 static VkResult
compile_spirv(struct lvp_device * pdevice,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,nir_shader ** nir)292 compile_spirv(struct lvp_device *pdevice,
293               VkPipelineCreateFlags2KHR pipeline_flags,
294               const VkPipelineShaderStageCreateInfo *sinfo,
295               nir_shader **nir)
296 {
297    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
298    assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
299    VkResult result;
300 
301 #ifdef VK_ENABLE_BETA_EXTENSIONS
302    const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
303       sinfo->pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
304 #endif
305 
306    const struct spirv_to_nir_options spirv_options = {
307       .environment = NIR_SPIRV_VULKAN,
308       .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
309       .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
310       .phys_ssbo_addr_format = nir_address_format_64bit_global,
311       .push_const_addr_format = nir_address_format_logical,
312       .shared_addr_format = nir_address_format_32bit_offset,
313       .constant_addr_format = nir_address_format_64bit_global,
314 #ifdef VK_ENABLE_BETA_EXTENSIONS
315       .shader_index = node_info ? node_info->index : 0,
316 #endif
317    };
318 
319    result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, pipeline_flags, sinfo,
320                                             &spirv_options, pdevice->physical_device->drv_options[stage],
321                                             NULL, nir);
322    return result;
323 }
324 
325 static bool
inline_variant_equals(const void * a,const void * b)326 inline_variant_equals(const void *a, const void *b)
327 {
328    const struct lvp_inline_variant *av = a, *bv = b;
329    assert(av->mask == bv->mask);
330    u_foreach_bit(slot, av->mask) {
331       if (memcmp(av->vals[slot], bv->vals[slot], sizeof(av->vals[slot])))
332          return false;
333    }
334    return true;
335 }
336 
337 static const struct vk_ycbcr_conversion_state *
lvp_ycbcr_conversion_lookup(const void * data,uint32_t set,uint32_t binding,uint32_t array_index)338 lvp_ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index)
339 {
340    const struct lvp_pipeline_layout *layout = data;
341 
342    const struct lvp_descriptor_set_layout *set_layout = container_of(layout->vk.set_layouts[set], struct lvp_descriptor_set_layout, vk);
343    const struct lvp_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding];
344    if (!binding_layout->immutable_samplers)
345       return NULL;
346 
347    struct vk_ycbcr_conversion *ycbcr_conversion = binding_layout->immutable_samplers[array_index]->vk.ycbcr_conversion;
348    return ycbcr_conversion ? &ycbcr_conversion->state : NULL;
349 }
350 
351 /* pipeline is NULL for shader objects. */
352 static void
lvp_shader_lower(struct lvp_device * pdevice,struct lvp_pipeline * pipeline,nir_shader * nir,struct lvp_pipeline_layout * layout)353 lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_shader *nir, struct lvp_pipeline_layout *layout)
354 {
355    if (nir->info.stage != MESA_SHADER_TESS_CTRL)
356       NIR_PASS_V(nir, remove_barriers, nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_MESH || nir->info.stage == MESA_SHADER_TASK);
357 
358    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
359       .frag_coord = true,
360       .point_coord = true,
361    };
362    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
363 
364    struct nir_lower_subgroups_options subgroup_opts = {0};
365    subgroup_opts.lower_quad = true;
366    subgroup_opts.ballot_components = 1;
367    subgroup_opts.ballot_bit_size = 32;
368    subgroup_opts.lower_inverse_ballot = true;
369    NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
370 
371    if (nir->info.stage == MESA_SHADER_FRAGMENT)
372       lvp_lower_input_attachments(nir, false);
373    NIR_PASS_V(nir, nir_lower_system_values);
374    NIR_PASS_V(nir, nir_lower_is_helper_invocation);
375    NIR_PASS_V(nir, lower_demote);
376 
377    const struct nir_lower_compute_system_values_options compute_system_values = {0};
378    NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_system_values);
379 
380    NIR_PASS_V(nir, nir_remove_dead_variables,
381               nir_var_uniform | nir_var_image, NULL);
382 
383    optimize(nir);
384    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
385 
386    NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
387    NIR_PASS_V(nir, nir_split_var_copies);
388    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
389 
390    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
391               nir_address_format_32bit_offset);
392 
393    NIR_PASS_V(nir, nir_lower_explicit_io,
394               nir_var_mem_ubo | nir_var_mem_ssbo,
395               nir_address_format_vec2_index_32bit_offset);
396 
397    NIR_PASS_V(nir, nir_lower_explicit_io,
398               nir_var_mem_global | nir_var_mem_constant,
399               nir_address_format_64bit_global);
400 
401    if (nir->info.stage == MESA_SHADER_COMPUTE)
402       lvp_lower_exec_graph(pipeline, nir);
403 
404    NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lvp_ycbcr_conversion_lookup, layout);
405 
406    nir_lower_non_uniform_access_options options = {
407       .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access,
408    };
409    NIR_PASS(_, nir, nir_lower_non_uniform_access, &options);
410 
411    lvp_lower_pipeline_layout(pdevice, layout, nir);
412 
413    NIR_PASS(_, nir, lvp_nir_lower_ray_queries);
414 
415    if (nir->info.stage == MESA_SHADER_COMPUTE ||
416        nir->info.stage == MESA_SHADER_TASK ||
417        nir->info.stage == MESA_SHADER_MESH) {
418       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
419       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
420    }
421 
422    if (nir->info.stage == MESA_SHADER_TASK ||
423        nir->info.stage == MESA_SHADER_MESH) {
424       NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_task_payload, shared_var_info);
425       NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_task_payload, nir_address_format_32bit_offset);
426    }
427 
428    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
429 
430    if (nir->info.stage == MESA_SHADER_VERTEX ||
431        nir->info.stage == MESA_SHADER_GEOMETRY) {
432       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
433    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
434       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
435    }
436 
437    // TODO: also optimize the tex srcs. see radeonSI for reference */
438    /* Skip if there are potentially conflicting rounding modes */
439    struct nir_opt_16bit_tex_image_options opt_16bit_options = {
440       .rounding_mode = nir_rounding_mode_undef,
441       .opt_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
442    };
443    NIR_PASS_V(nir, nir_opt_16bit_tex_image, &opt_16bit_options);
444 
445    /* Lower texture OPs llvmpipe supports to reduce the amount of sample
446     * functions that need to be pre-compiled.
447     */
448    const nir_lower_tex_options tex_options = {
449       /* lower_tg4_offsets can introduce new sparse residency intrinsics
450        * which is why we have to lower everything before calling
451        * lvp_nir_lower_sparse_residency.
452        */
453       .lower_tg4_offsets = true,
454       .lower_txd = true,
455    };
456    NIR_PASS(_, nir, nir_lower_tex, &tex_options);
457 
458    NIR_PASS(_, nir, lvp_nir_lower_sparse_residency);
459 
460    lvp_shader_optimize(nir);
461 
462    if (nir->info.stage != MESA_SHADER_VERTEX)
463       nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
464    else {
465       nir->num_inputs = util_last_bit64(nir->info.inputs_read);
466       nir_foreach_shader_in_variable(var, nir) {
467          var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
468       }
469    }
470    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
471                                nir->info.stage);
472 }
473 
474 VkResult
lvp_spirv_to_nir(struct lvp_pipeline * pipeline,const VkPipelineShaderStageCreateInfo * sinfo,nir_shader ** out_nir)475 lvp_spirv_to_nir(struct lvp_pipeline *pipeline, const VkPipelineShaderStageCreateInfo *sinfo,
476                  nir_shader **out_nir)
477 {
478    VkResult result = compile_spirv(pipeline->device, pipeline->flags, sinfo, out_nir);
479    if (result == VK_SUCCESS)
480       lvp_shader_lower(pipeline->device, pipeline, *out_nir, pipeline->layout);
481 
482    return result;
483 }
484 
485 void
lvp_shader_init(struct lvp_shader * shader,nir_shader * nir)486 lvp_shader_init(struct lvp_shader *shader, nir_shader *nir)
487 {
488    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
489    if (impl->ssa_alloc > 100) //skip for small shaders
490       shader->inlines.must_inline = lvp_find_inlinable_uniforms(shader, nir);
491    shader->pipeline_nir = lvp_create_pipeline_nir(nir);
492    if (shader->inlines.can_inline)
493       _mesa_set_init(&shader->inlines.variants, NULL, NULL, inline_variant_equals);
494 }
495 
496 static VkResult
lvp_shader_compile_to_ir(struct lvp_pipeline * pipeline,const VkPipelineShaderStageCreateInfo * sinfo)497 lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
498                          const VkPipelineShaderStageCreateInfo *sinfo)
499 {
500    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
501    assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
502    nir_shader *nir;
503    VkResult result = lvp_spirv_to_nir(pipeline, sinfo, &nir);
504    if (result == VK_SUCCESS) {
505       struct lvp_shader *shader = &pipeline->shaders[stage];
506       lvp_shader_init(shader, nir);
507    }
508    return result;
509 }
510 
511 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)512 merge_tess_info(struct shader_info *tes_info,
513                 const struct shader_info *tcs_info)
514 {
515    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
516     *
517     *    "PointMode. Controls generation of points rather than triangles
518     *     or lines. This functionality defaults to disabled, and is
519     *     enabled if either shader stage includes the execution mode.
520     *
521     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
522     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
523     * and OutputVertices, it says:
524     *
525     *    "One mode must be set in at least one of the tessellation
526     *     shader stages."
527     *
528     * So, the fields can be set in either the TCS or TES, but they must
529     * agree if set in both.  Our backend looks at TES, so bitwise-or in
530     * the values from the TCS.
531     */
532    assert(tcs_info->tess.tcs_vertices_out == 0 ||
533           tes_info->tess.tcs_vertices_out == 0 ||
534           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
535    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
536 
537    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
538           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
539           tcs_info->tess.spacing == tes_info->tess.spacing);
540    tes_info->tess.spacing |= tcs_info->tess.spacing;
541 
542    assert(tcs_info->tess._primitive_mode == 0 ||
543           tes_info->tess._primitive_mode == 0 ||
544           tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
545    tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
546    tes_info->tess.ccw |= tcs_info->tess.ccw;
547    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
548 }
549 
550 static void
lvp_shader_xfb_init(struct lvp_shader * shader)551 lvp_shader_xfb_init(struct lvp_shader *shader)
552 {
553    nir_xfb_info *xfb_info = shader->pipeline_nir->nir->xfb_info;
554    if (xfb_info) {
555       uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
556       memset(output_mapping, 0, sizeof(output_mapping));
557 
558       nir_foreach_shader_out_variable(var, shader->pipeline_nir->nir) {
559          unsigned slots = nir_variable_count_slots(var, var->type);
560          for (unsigned i = 0; i < slots; i++)
561             output_mapping[var->data.location + i] = var->data.driver_location + i;
562       }
563 
564       shader->stream_output.num_outputs = xfb_info->output_count;
565       for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
566          if (xfb_info->buffers_written & (1 << i)) {
567             shader->stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
568          }
569       }
570       for (unsigned i = 0; i < xfb_info->output_count; i++) {
571          shader->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
572          shader->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
573          shader->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
574          shader->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
575          shader->stream_output.output[i].start_component = xfb_info->outputs[i].component_offset;
576          shader->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
577       }
578 
579    }
580 }
581 
582 static void
lvp_pipeline_xfb_init(struct lvp_pipeline * pipeline)583 lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline)
584 {
585    gl_shader_stage stage = MESA_SHADER_VERTEX;
586    if (pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir)
587       stage = MESA_SHADER_GEOMETRY;
588    else if (pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir)
589       stage = MESA_SHADER_TESS_EVAL;
590    else if (pipeline->shaders[MESA_SHADER_MESH].pipeline_nir)
591       stage = MESA_SHADER_MESH;
592    pipeline->last_vertex = stage;
593    lvp_shader_xfb_init(&pipeline->shaders[stage]);
594 }
595 
596 static void *
lvp_shader_compile_stage(struct lvp_device * device,struct lvp_shader * shader,nir_shader * nir)597 lvp_shader_compile_stage(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir)
598 {
599    if (nir->info.stage == MESA_SHADER_COMPUTE) {
600       struct pipe_compute_state shstate = {0};
601       shstate.prog = nir;
602       shstate.ir_type = PIPE_SHADER_IR_NIR;
603       shstate.static_shared_mem = nir->info.shared_size;
604       return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
605    } else {
606       struct pipe_shader_state shstate = {0};
607       shstate.type = PIPE_SHADER_IR_NIR;
608       shstate.ir.nir = nir;
609       memcpy(&shstate.stream_output, &shader->stream_output, sizeof(shstate.stream_output));
610 
611       switch (nir->info.stage) {
612       case MESA_SHADER_FRAGMENT:
613          return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
614       case MESA_SHADER_VERTEX:
615          return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
616       case MESA_SHADER_GEOMETRY:
617          return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
618       case MESA_SHADER_TESS_CTRL:
619          return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
620       case MESA_SHADER_TESS_EVAL:
621          return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
622       case MESA_SHADER_TASK:
623          return device->queue.ctx->create_ts_state(device->queue.ctx, &shstate);
624       case MESA_SHADER_MESH:
625          return device->queue.ctx->create_ms_state(device->queue.ctx, &shstate);
626       default:
627          unreachable("illegal shader");
628          break;
629       }
630    }
631    return NULL;
632 }
633 
634 void *
lvp_shader_compile(struct lvp_device * device,struct lvp_shader * shader,nir_shader * nir,bool locked)635 lvp_shader_compile(struct lvp_device *device, struct lvp_shader *shader, nir_shader *nir, bool locked)
636 {
637    device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir);
638 
639    if (!locked)
640       simple_mtx_lock(&device->queue.lock);
641 
642    void *state = lvp_shader_compile_stage(device, shader, nir);
643 
644    if (!locked)
645       simple_mtx_unlock(&device->queue.lock);
646 
647    return state;
648 }
649 
650 #ifndef NDEBUG
651 static bool
layouts_equal(const struct lvp_descriptor_set_layout * a,const struct lvp_descriptor_set_layout * b)652 layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
653 {
654    const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
655    uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
656    uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
657    /* base equal */
658    if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
659       return false;
660 
661    /* bindings equal */
662    if (a->binding_count != b->binding_count)
663       return false;
664    size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
665    const struct lvp_descriptor_set_binding_layout *la = a->binding;
666    const struct lvp_descriptor_set_binding_layout *lb = b->binding;
667    if (memcmp(la, lb, binding_size)) {
668       for (unsigned i = 0; i < a->binding_count; i++) {
669          if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
670             return false;
671       }
672    }
673 
674    /* immutable sampler equal */
675    if (a->immutable_sampler_count != b->immutable_sampler_count)
676       return false;
677    if (a->immutable_sampler_count) {
678       size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
679       if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
680          struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
681          struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
682          for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
683             if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
684                return false;
685          }
686       }
687    }
688    return true;
689 }
690 #endif
691 
692 static void
merge_layouts(struct vk_device * device,struct lvp_pipeline * dst,struct lvp_pipeline_layout * src)693 merge_layouts(struct vk_device *device, struct lvp_pipeline *dst, struct lvp_pipeline_layout *src)
694 {
695    if (!src)
696       return;
697    if (dst->layout) {
698       /* these must match */
699       ASSERTED VkPipelineCreateFlags src_flag = src->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
700       ASSERTED VkPipelineCreateFlags dst_flag = dst->layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT;
701       assert(src_flag == dst_flag);
702    }
703    /* always try to reuse existing layout: independent sets bit doesn't guarantee independent sets */
704    if (!dst->layout) {
705       dst->layout = (struct lvp_pipeline_layout*)vk_pipeline_layout_ref(&src->vk);
706       return;
707    }
708    /* this is a big optimization when hit */
709    if (dst->layout == src)
710       return;
711 #ifndef NDEBUG
712    /* verify that layouts match */
713    const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src;
714    const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout;
715    for (unsigned i = 0; i < smaller->vk.set_count; i++) {
716       if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] ||
717           smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i])
718          continue;
719 
720       const struct lvp_descriptor_set_layout *smaller_set_layout =
721          vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]);
722       const struct lvp_descriptor_set_layout *bigger_set_layout =
723          vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]);
724 
725       assert(!smaller_set_layout->binding_count ||
726              !bigger_set_layout->binding_count ||
727              layouts_equal(smaller_set_layout, bigger_set_layout));
728    }
729 #endif
730    /* must be independent sets with different layouts: reallocate to avoid modifying original layout */
731    struct lvp_pipeline_layout *old_layout = dst->layout;
732    dst->layout = vk_zalloc(&device->alloc, sizeof(struct lvp_pipeline_layout), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
733    memcpy(dst->layout, old_layout, sizeof(struct lvp_pipeline_layout));
734    dst->layout->vk.ref_cnt = 1;
735    for (unsigned i = 0; i < dst->layout->vk.set_count; i++) {
736       if (dst->layout->vk.set_layouts[i])
737          vk_descriptor_set_layout_ref(dst->layout->vk.set_layouts[i]);
738    }
739    vk_pipeline_layout_unref(device, &old_layout->vk);
740 
741    for (unsigned i = 0; i < src->vk.set_count; i++) {
742       if (!dst->layout->vk.set_layouts[i]) {
743          dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i];
744          if (dst->layout->vk.set_layouts[i])
745             vk_descriptor_set_layout_ref(src->vk.set_layouts[i]);
746       }
747    }
748    dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count,
749                                     src->vk.set_count);
750    dst->layout->push_constant_size += src->push_constant_size;
751    dst->layout->push_constant_stages |= src->push_constant_stages;
752 }
753 
754 static void
copy_shader_sanitized(struct lvp_shader * dst,const struct lvp_shader * src)755 copy_shader_sanitized(struct lvp_shader *dst, const struct lvp_shader *src)
756 {
757    *dst = *src;
758    dst->pipeline_nir = NULL; //this gets handled later
759    dst->tess_ccw = NULL; //this gets handled later
760    assert(!dst->shader_cso);
761    assert(!dst->tess_ccw_cso);
762    if (src->inlines.can_inline)
763       _mesa_set_init(&dst->inlines.variants, NULL, NULL, inline_variant_equals);
764 }
765 
766 static VkResult
lvp_graphics_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags)767 lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
768                            struct lvp_device *device,
769                            struct lvp_pipeline_cache *cache,
770                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
771                            VkPipelineCreateFlagBits2KHR flags)
772 {
773    pipeline->type = LVP_PIPELINE_GRAPHICS;
774    pipeline->flags = flags;
775 
776    VkResult result;
777 
778    const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo,
779                                                                                 GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
780    const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo,
781                                                                          PIPELINE_LIBRARY_CREATE_INFO_KHR);
782    const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
783                                                            VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
784    if (libinfo)
785       pipeline->stages = libinfo->flags;
786    else if (!libstate)
787       pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
788                          VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
789                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
790                          VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
791 
792    if (flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
793       pipeline->library = true;
794 
795    struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
796 
797    if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
798       /* this is a regular pipeline with no partials: directly reuse */
799       pipeline->layout = layout ? (void*)vk_pipeline_layout_ref(&layout->vk) : NULL;
800    else if (pipeline->stages & layout_stages) {
801       if ((pipeline->stages & layout_stages) == layout_stages)
802          /* this has all the layout stages: directly reuse */
803          pipeline->layout = (void*)vk_pipeline_layout_ref(&layout->vk);
804       else {
805          /* this is a partial: copy for later merging to avoid modifying another layout */
806          merge_layouts(&device->vk, pipeline, layout);
807       }
808    }
809 
810    if (libstate) {
811       for (unsigned i = 0; i < libstate->libraryCount; i++) {
812          LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
813          vk_graphics_pipeline_state_merge(&pipeline->graphics_state,
814                                           &p->graphics_state);
815          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
816             pipeline->line_smooth = p->line_smooth;
817             pipeline->disable_multisample = p->disable_multisample;
818             pipeline->line_rectangular = p->line_rectangular;
819             memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4);
820             memcpy(&pipeline->shaders[MESA_SHADER_TASK], &p->shaders[MESA_SHADER_TASK], sizeof(struct lvp_shader) * 2);
821             lvp_forall_gfx_stage(i) {
822                if (i == MESA_SHADER_FRAGMENT)
823                   continue;
824                copy_shader_sanitized(&pipeline->shaders[i], &p->shaders[i]);
825             }
826          }
827          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
828             pipeline->force_min_sample = p->force_min_sample;
829             copy_shader_sanitized(&pipeline->shaders[MESA_SHADER_FRAGMENT], &p->shaders[MESA_SHADER_FRAGMENT]);
830          }
831          if (p->stages & layout_stages) {
832             if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
833                merge_layouts(&device->vk, pipeline, p->layout);
834          }
835          pipeline->stages |= p->stages;
836       }
837    }
838 
839    result = vk_graphics_pipeline_state_fill(&device->vk,
840                                             &pipeline->graphics_state,
841                                             pCreateInfo, NULL, 0, NULL, NULL,
842                                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
843                                             &pipeline->state_data);
844    if (result != VK_SUCCESS)
845       return result;
846 
847    assert(pipeline->library || pipeline->stages & (VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
848                                                    VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
849                                                    VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT));
850 
851    pipeline->device = device;
852 
853    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
854       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
855       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
856       if (stage == MESA_SHADER_FRAGMENT) {
857          if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
858             continue;
859       } else {
860          if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT))
861             continue;
862       }
863       result = lvp_shader_compile_to_ir(pipeline, sinfo);
864       if (result != VK_SUCCESS)
865          goto fail;
866 
867       switch (stage) {
868       case MESA_SHADER_FRAGMENT:
869          if (pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir->info.fs.uses_sample_shading)
870             pipeline->force_min_sample = true;
871          break;
872       default: break;
873       }
874    }
875    if (pCreateInfo->stageCount && pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) {
876       nir_lower_patch_vertices(pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir, pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
877       merge_tess_info(&pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info, &pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info);
878       if (BITSET_TEST(pipeline->graphics_state.dynamic,
879                       MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
880          pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir));
881          pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
882       } else if (pipeline->graphics_state.ts &&
883                  pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) {
884          pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw;
885       }
886    }
887    if (libstate) {
888        for (unsigned i = 0; i < libstate->libraryCount; i++) {
889           LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
890           if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
891              if (p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir)
892                 lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir, p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir);
893           }
894           if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
895              lvp_forall_gfx_stage(j) {
896                 if (j == MESA_SHADER_FRAGMENT)
897                    continue;
898                 if (p->shaders[j].pipeline_nir)
899                    lvp_pipeline_nir_ref(&pipeline->shaders[j].pipeline_nir, p->shaders[j].pipeline_nir);
900              }
901              if (p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
902                 lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw, p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw);
903           }
904        }
905    } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
906       const struct vk_rasterization_state *rs = pipeline->graphics_state.rs;
907       if (rs) {
908          /* always draw bresenham if !smooth */
909          pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
910          pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR ||
911                                          rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
912          pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
913       } else
914          pipeline->line_rectangular = true;
915       lvp_pipeline_xfb_init(pipeline);
916    }
917    if (!libstate && !pipeline->library)
918       lvp_pipeline_shaders_compile(pipeline, false);
919 
920    return VK_SUCCESS;
921 
922 fail:
923    for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
924       lvp_pipeline_nir_ref(&pipeline->shaders[i].pipeline_nir, NULL);
925    }
926    vk_free(&device->vk.alloc, pipeline->state_data);
927 
928    return result;
929 }
930 
931 void
lvp_pipeline_shaders_compile(struct lvp_pipeline * pipeline,bool locked)932 lvp_pipeline_shaders_compile(struct lvp_pipeline *pipeline, bool locked)
933 {
934    if (pipeline->compiled)
935       return;
936    for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) {
937       if (!pipeline->shaders[i].pipeline_nir)
938          continue;
939 
940       gl_shader_stage stage = i;
941       assert(stage == pipeline->shaders[i].pipeline_nir->nir->info.stage);
942 
943       if (!pipeline->shaders[stage].inlines.can_inline) {
944          pipeline->shaders[stage].shader_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
945             nir_shader_clone(NULL, pipeline->shaders[stage].pipeline_nir->nir), locked);
946          if (pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw)
947             pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw_cso = lvp_shader_compile(pipeline->device, &pipeline->shaders[stage],
948                nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir), locked);
949       }
950    }
951    pipeline->compiled = true;
952 }
953 
954 static VkResult
lvp_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags,VkPipeline * pPipeline,bool group)955 lvp_graphics_pipeline_create(
956    VkDevice _device,
957    VkPipelineCache _cache,
958    const VkGraphicsPipelineCreateInfo *pCreateInfo,
959    VkPipelineCreateFlagBits2KHR flags,
960    VkPipeline *pPipeline,
961    bool group)
962 {
963    LVP_FROM_HANDLE(lvp_device, device, _device);
964    LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
965    struct lvp_pipeline *pipeline;
966    VkResult result;
967 
968    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
969 
970    size_t size = 0;
971    const VkGraphicsPipelineShaderGroupsCreateInfoNV *groupinfo = vk_find_struct_const(pCreateInfo, GRAPHICS_PIPELINE_SHADER_GROUPS_CREATE_INFO_NV);
972    if (!group && groupinfo)
973       size += (groupinfo->groupCount + groupinfo->pipelineCount) * sizeof(VkPipeline);
974 
975    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + size, 8,
976                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
977    if (pipeline == NULL)
978       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
979 
980    vk_object_base_init(&device->vk, &pipeline->base,
981                        VK_OBJECT_TYPE_PIPELINE);
982    uint64_t t0 = os_time_get_nano();
983    result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, flags);
984    if (result != VK_SUCCESS) {
985       vk_free(&device->vk.alloc, pipeline);
986       return result;
987    }
988    if (!group && groupinfo) {
989       VkGraphicsPipelineCreateInfo pci = *pCreateInfo;
990       for (unsigned i = 0; i < groupinfo->groupCount; i++) {
991          const VkGraphicsShaderGroupCreateInfoNV *g = &groupinfo->pGroups[i];
992          pci.pVertexInputState = g->pVertexInputState;
993          pci.pTessellationState = g->pTessellationState;
994          pci.pStages = g->pStages;
995          pci.stageCount = g->stageCount;
996          result = lvp_graphics_pipeline_create(_device, _cache, &pci, flags, &pipeline->groups[i], true);
997          if (result != VK_SUCCESS) {
998             lvp_pipeline_destroy(device, pipeline, false);
999             return result;
1000          }
1001          pipeline->num_groups++;
1002       }
1003       for (unsigned i = 0; i < groupinfo->pipelineCount; i++)
1004          pipeline->groups[pipeline->num_groups + i] = groupinfo->pPipelines[i];
1005       pipeline->num_groups_total = groupinfo->groupCount + groupinfo->pipelineCount;
1006    }
1007 
1008    VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1009    if (feedback && !group) {
1010       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1011       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1012       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1013    }
1014 
1015    *pPipeline = lvp_pipeline_to_handle(pipeline);
1016 
1017    return VK_SUCCESS;
1018 }
1019 
lvp_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1020 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
1021    VkDevice                                    _device,
1022    VkPipelineCache                             pipelineCache,
1023    uint32_t                                    count,
1024    const VkGraphicsPipelineCreateInfo*         pCreateInfos,
1025    const VkAllocationCallbacks*                pAllocator,
1026    VkPipeline*                                 pPipelines)
1027 {
1028    VkResult result = VK_SUCCESS;
1029    unsigned i = 0;
1030 
1031    for (; i < count; i++) {
1032       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1033       VkPipelineCreateFlagBits2KHR flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
1034 
1035       if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1036          r = lvp_graphics_pipeline_create(_device,
1037                                           pipelineCache,
1038                                           &pCreateInfos[i],
1039                                           flags,
1040                                           &pPipelines[i],
1041                                           false);
1042       if (r != VK_SUCCESS) {
1043          result = r;
1044          pPipelines[i] = VK_NULL_HANDLE;
1045          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1046             break;
1047       }
1048    }
1049    if (result != VK_SUCCESS) {
1050       for (; i < count; i++)
1051          pPipelines[i] = VK_NULL_HANDLE;
1052    }
1053 
1054    return result;
1055 }
1056 
1057 static VkResult
lvp_compute_pipeline_init(struct lvp_pipeline * pipeline,struct lvp_device * device,struct lvp_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags)1058 lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,
1059                           struct lvp_device *device,
1060                           struct lvp_pipeline_cache *cache,
1061                           const VkComputePipelineCreateInfo *pCreateInfo,
1062                           VkPipelineCreateFlagBits2KHR flags)
1063 {
1064    pipeline->flags = flags;
1065    pipeline->device = device;
1066    pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
1067    vk_pipeline_layout_ref(&pipeline->layout->vk);
1068    pipeline->force_min_sample = false;
1069 
1070    pipeline->type = LVP_PIPELINE_COMPUTE;
1071 
1072    VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage);
1073    if (result != VK_SUCCESS)
1074       return result;
1075 
1076    struct lvp_shader *shader = &pipeline->shaders[MESA_SHADER_COMPUTE];
1077    if (!shader->inlines.can_inline)
1078       shader->shader_cso = lvp_shader_compile(pipeline->device, shader, nir_shader_clone(NULL, shader->pipeline_nir->nir), false);
1079    pipeline->compiled = true;
1080    return VK_SUCCESS;
1081 }
1082 
1083 static VkResult
lvp_compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,VkPipelineCreateFlagBits2KHR flags,VkPipeline * pPipeline)1084 lvp_compute_pipeline_create(
1085    VkDevice _device,
1086    VkPipelineCache _cache,
1087    const VkComputePipelineCreateInfo *pCreateInfo,
1088    VkPipelineCreateFlagBits2KHR flags,
1089    VkPipeline *pPipeline)
1090 {
1091    LVP_FROM_HANDLE(lvp_device, device, _device);
1092    LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
1093    struct lvp_pipeline *pipeline;
1094    VkResult result;
1095 
1096    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1097 
1098    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
1099                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1100    if (pipeline == NULL)
1101       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1102 
1103    vk_object_base_init(&device->vk, &pipeline->base,
1104                        VK_OBJECT_TYPE_PIPELINE);
1105    uint64_t t0 = os_time_get_nano();
1106    result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo, flags);
1107    if (result != VK_SUCCESS) {
1108       vk_free(&device->vk.alloc, pipeline);
1109       return result;
1110    }
1111 
1112    const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1113    if (feedback) {
1114       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1115       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1116       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1117    }
1118 
1119    *pPipeline = lvp_pipeline_to_handle(pipeline);
1120 
1121    return VK_SUCCESS;
1122 }
1123 
lvp_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1124 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(
1125    VkDevice                                    _device,
1126    VkPipelineCache                             pipelineCache,
1127    uint32_t                                    count,
1128    const VkComputePipelineCreateInfo*          pCreateInfos,
1129    const VkAllocationCallbacks*                pAllocator,
1130    VkPipeline*                                 pPipelines)
1131 {
1132    VkResult result = VK_SUCCESS;
1133    unsigned i = 0;
1134 
1135    for (; i < count; i++) {
1136       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1137       VkPipelineCreateFlagBits2KHR flags = vk_compute_pipeline_create_flags(&pCreateInfos[i]);
1138 
1139       if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1140          r = lvp_compute_pipeline_create(_device,
1141                                          pipelineCache,
1142                                          &pCreateInfos[i],
1143                                          flags,
1144                                          &pPipelines[i]);
1145       if (r != VK_SUCCESS) {
1146          result = r;
1147          pPipelines[i] = VK_NULL_HANDLE;
1148          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1149             break;
1150       }
1151    }
1152    if (result != VK_SUCCESS) {
1153       for (; i < count; i++)
1154          pPipelines[i] = VK_NULL_HANDLE;
1155    }
1156 
1157 
1158    return result;
1159 }
1160 
lvp_DestroyShaderEXT(VkDevice _device,VkShaderEXT _shader,const VkAllocationCallbacks * pAllocator)1161 VKAPI_ATTR void VKAPI_CALL lvp_DestroyShaderEXT(
1162     VkDevice                                    _device,
1163     VkShaderEXT                                 _shader,
1164     const VkAllocationCallbacks*                pAllocator)
1165 {
1166    LVP_FROM_HANDLE(lvp_device, device, _device);
1167    LVP_FROM_HANDLE(lvp_shader, shader, _shader);
1168 
1169    if (!shader)
1170       return;
1171    shader_destroy(device, shader, false);
1172 
1173    vk_pipeline_layout_unref(&device->vk, &shader->layout->vk);
1174    blob_finish(&shader->blob);
1175    vk_object_base_finish(&shader->base);
1176    vk_free2(&device->vk.alloc, pAllocator, shader);
1177 }
1178 
1179 static VkShaderEXT
create_shader_object(struct lvp_device * device,const VkShaderCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator)1180 create_shader_object(struct lvp_device *device, const VkShaderCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator)
1181 {
1182    nir_shader *nir = NULL;
1183    gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage);
1184    assert(stage <= LVP_SHADER_STAGES && stage != MESA_SHADER_NONE);
1185    if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT) {
1186       VkShaderModuleCreateInfo minfo = {
1187          VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
1188          NULL,
1189          0,
1190          pCreateInfo->codeSize,
1191          pCreateInfo->pCode,
1192       };
1193       VkPipelineShaderStageCreateFlagBits flags = 0;
1194       if (pCreateInfo->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
1195          flags |= VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT;
1196       if (pCreateInfo->flags & VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
1197          flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT;
1198       VkPipelineShaderStageCreateInfo sinfo = {
1199          VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1200          &minfo,
1201          flags,
1202          pCreateInfo->stage,
1203          VK_NULL_HANDLE,
1204          pCreateInfo->pName,
1205          pCreateInfo->pSpecializationInfo,
1206       };
1207       VkResult result = compile_spirv(device, 0, &sinfo, &nir);
1208       if (result != VK_SUCCESS)
1209          goto fail;
1210       nir->info.separate_shader = true;
1211    } else {
1212       assert(pCreateInfo->codeType == VK_SHADER_CODE_TYPE_BINARY_EXT);
1213       if (pCreateInfo->codeSize < SHA1_DIGEST_LENGTH + VK_UUID_SIZE + 1)
1214          return VK_NULL_HANDLE;
1215       struct blob_reader blob;
1216       const uint8_t *data = pCreateInfo->pCode;
1217       uint8_t uuid[VK_UUID_SIZE];
1218       lvp_device_get_cache_uuid(uuid);
1219       if (memcmp(uuid, data, VK_UUID_SIZE))
1220          return VK_NULL_HANDLE;
1221       size_t size = pCreateInfo->codeSize - SHA1_DIGEST_LENGTH - VK_UUID_SIZE;
1222       unsigned char sha1[20];
1223 
1224       struct mesa_sha1 sctx;
1225       _mesa_sha1_init(&sctx);
1226       _mesa_sha1_update(&sctx, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
1227       _mesa_sha1_final(&sctx, sha1);
1228       if (memcmp(sha1, data + VK_UUID_SIZE, SHA1_DIGEST_LENGTH))
1229          return VK_NULL_HANDLE;
1230 
1231       blob_reader_init(&blob, data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, size);
1232       nir = nir_deserialize(NULL, device->pscreen->get_compiler_options(device->pscreen, PIPE_SHADER_IR_NIR, stage), &blob);
1233       if (!nir)
1234          goto fail;
1235    }
1236    if (!nir_shader_get_entrypoint(nir))
1237       goto fail;
1238    struct lvp_shader *shader = vk_object_zalloc(&device->vk, pAllocator, sizeof(struct lvp_shader), VK_OBJECT_TYPE_SHADER_EXT);
1239    if (!shader)
1240       goto fail;
1241    blob_init(&shader->blob);
1242    VkPipelineLayoutCreateInfo pci = {
1243       VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1244       NULL,
1245       0,
1246       pCreateInfo->setLayoutCount,
1247       pCreateInfo->pSetLayouts,
1248       pCreateInfo->pushConstantRangeCount,
1249       pCreateInfo->pPushConstantRanges,
1250    };
1251    shader->layout = lvp_pipeline_layout_create(device, &pci, pAllocator);
1252 
1253    if (pCreateInfo->codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT)
1254       lvp_shader_lower(device, NULL, nir, shader->layout);
1255 
1256    lvp_shader_init(shader, nir);
1257 
1258    lvp_shader_xfb_init(shader);
1259    if (stage == MESA_SHADER_TESS_EVAL) {
1260       /* spec requires that all tess modes are set in both shaders */
1261       nir_lower_patch_vertices(shader->pipeline_nir->nir, shader->pipeline_nir->nir->info.tess.tcs_vertices_out, NULL);
1262       shader->tess_ccw = lvp_create_pipeline_nir(nir_shader_clone(NULL, shader->pipeline_nir->nir));
1263       shader->tess_ccw->nir->info.tess.ccw = !shader->pipeline_nir->nir->info.tess.ccw;
1264       shader->tess_ccw_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, shader->tess_ccw->nir), false);
1265    } else if (stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) {
1266       /* this is (currently) illegal */
1267       assert(!nir->info.fs.uses_fbfetch_output);
1268       shader_destroy(device, shader, false);
1269 
1270       vk_object_base_finish(&shader->base);
1271       vk_free2(&device->vk.alloc, pAllocator, shader);
1272       return VK_NULL_HANDLE;
1273    }
1274    nir_serialize(&shader->blob, nir, true);
1275    shader->shader_cso = lvp_shader_compile(device, shader, nir_shader_clone(NULL, nir), false);
1276    return lvp_shader_to_handle(shader);
1277 fail:
1278    ralloc_free(nir);
1279    return VK_NULL_HANDLE;
1280 }
1281 
lvp_CreateShadersEXT(VkDevice _device,uint32_t createInfoCount,const VkShaderCreateInfoEXT * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkShaderEXT * pShaders)1282 VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateShadersEXT(
1283     VkDevice                                    _device,
1284     uint32_t                                    createInfoCount,
1285     const VkShaderCreateInfoEXT*                pCreateInfos,
1286     const VkAllocationCallbacks*                pAllocator,
1287     VkShaderEXT*                                pShaders)
1288 {
1289    LVP_FROM_HANDLE(lvp_device, device, _device);
1290    unsigned i;
1291    for (i = 0; i < createInfoCount; i++) {
1292       pShaders[i] = create_shader_object(device, &pCreateInfos[i], pAllocator);
1293       if (!pShaders[i]) {
1294          if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_BINARY_EXT) {
1295             if (i < createInfoCount - 1)
1296                memset(&pShaders[i + 1], 0, (createInfoCount - i - 1) * sizeof(VkShaderEXT));
1297             return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
1298          }
1299          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1300       }
1301    }
1302    return VK_SUCCESS;
1303 }
1304 
1305 
lvp_GetShaderBinaryDataEXT(VkDevice device,VkShaderEXT _shader,size_t * pDataSize,void * pData)1306 VKAPI_ATTR VkResult VKAPI_CALL lvp_GetShaderBinaryDataEXT(
1307     VkDevice                                    device,
1308     VkShaderEXT                                 _shader,
1309     size_t*                                     pDataSize,
1310     void*                                       pData)
1311 {
1312    LVP_FROM_HANDLE(lvp_shader, shader, _shader);
1313    VkResult ret = VK_SUCCESS;
1314    if (pData) {
1315       if (*pDataSize < shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE) {
1316          ret = VK_INCOMPLETE;
1317          *pDataSize = 0;
1318       } else {
1319          *pDataSize = MIN2(*pDataSize, shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE);
1320          uint8_t *data = pData;
1321          lvp_device_get_cache_uuid(data);
1322          struct mesa_sha1 sctx;
1323          _mesa_sha1_init(&sctx);
1324          _mesa_sha1_update(&sctx, shader->blob.data, shader->blob.size);
1325          _mesa_sha1_final(&sctx, data + VK_UUID_SIZE);
1326          memcpy(data + SHA1_DIGEST_LENGTH + VK_UUID_SIZE, shader->blob.data, shader->blob.size);
1327       }
1328    } else {
1329       *pDataSize = shader->blob.size + SHA1_DIGEST_LENGTH + VK_UUID_SIZE;
1330    }
1331    return ret;
1332 }
1333 
1334 #ifdef VK_ENABLE_BETA_EXTENSIONS
1335 static VkResult
lvp_exec_graph_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkExecutionGraphPipelineCreateInfoAMDX * create_info,VkPipelineCreateFlagBits2KHR flags,VkPipeline * out_pipeline)1336 lvp_exec_graph_pipeline_create(VkDevice _device, VkPipelineCache _cache,
1337                                const VkExecutionGraphPipelineCreateInfoAMDX *create_info,
1338                                VkPipelineCreateFlagBits2KHR flags,
1339                                VkPipeline *out_pipeline)
1340 {
1341    LVP_FROM_HANDLE(lvp_device, device, _device);
1342    struct lvp_pipeline *pipeline;
1343    VkResult result;
1344 
1345    assert(create_info->sType == VK_STRUCTURE_TYPE_EXECUTION_GRAPH_PIPELINE_CREATE_INFO_AMDX);
1346 
1347    uint32_t stage_count = create_info->stageCount;
1348    if (create_info->pLibraryInfo) {
1349       for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
1350          VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
1351          stage_count += library->num_groups;
1352       }
1353    }
1354 
1355    pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline) + stage_count * sizeof(VkPipeline), 8,
1356                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1357    if (!pipeline)
1358       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1359 
1360    vk_object_base_init(&device->vk, &pipeline->base,
1361                        VK_OBJECT_TYPE_PIPELINE);
1362 
1363    uint64_t t0 = os_time_get_nano();
1364 
1365    pipeline->type = LVP_PIPELINE_EXEC_GRAPH;
1366    pipeline->flags = vk_graph_pipeline_create_flags(create_info);
1367    pipeline->layout = lvp_pipeline_layout_from_handle(create_info->layout);
1368 
1369    pipeline->exec_graph.scratch_size = 0;
1370    pipeline->num_groups = stage_count;
1371 
1372    uint32_t stage_index = 0;
1373    for (uint32_t i = 0; i < create_info->stageCount; i++) {
1374       const VkPipelineShaderStageNodeCreateInfoAMDX *node_info = vk_find_struct_const(
1375          create_info->pStages[i].pNext, PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX);
1376 
1377       VkComputePipelineCreateInfo stage_create_info = {
1378          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1379          .flags = create_info->flags,
1380          .stage = create_info->pStages[i],
1381          .layout = create_info->layout,
1382       };
1383 
1384       result = lvp_compute_pipeline_create(_device, _cache, &stage_create_info, flags, &pipeline->groups[i]);
1385       if (result != VK_SUCCESS)
1386          goto fail;
1387 
1388       VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
1389       nir_shader *nir = stage->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir;
1390 
1391       if (node_info) {
1392          stage->exec_graph.name = node_info->pName;
1393          stage->exec_graph.index = node_info->index;
1394       }
1395 
1396       /* TODO: Add a shader info NIR pass to figure out how many the payloads the shader creates. */
1397       stage->exec_graph.scratch_size = nir->info.cs.node_payloads_size * 256;
1398       pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, stage->exec_graph.scratch_size);
1399 
1400       stage_index++;
1401    }
1402 
1403    if (create_info->pLibraryInfo) {
1404       for (uint32_t i = 0; i < create_info->pLibraryInfo->libraryCount; i++) {
1405          VK_FROM_HANDLE(lvp_pipeline, library, create_info->pLibraryInfo->pLibraries[i]);
1406          for (uint32_t j = 0; j < library->num_groups; j++) {
1407             /* TODO: Do we need reference counting? */
1408             pipeline->groups[stage_index] = library->groups[j];
1409             stage_index++;
1410          }
1411          pipeline->exec_graph.scratch_size = MAX2(pipeline->exec_graph.scratch_size, library->exec_graph.scratch_size);
1412       }
1413    }
1414 
1415    const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1416    if (feedback) {
1417       feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1418       feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1419       memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1420    }
1421 
1422    *out_pipeline = lvp_pipeline_to_handle(pipeline);
1423 
1424    return VK_SUCCESS;
1425 
1426 fail:
1427    for (uint32_t i = 0; i < stage_count; i++)
1428       lvp_DestroyPipeline(_device, pipeline->groups[i], NULL);
1429 
1430    vk_free(&device->vk.alloc, pipeline);
1431 
1432    return result;
1433 }
1434 
1435 VKAPI_ATTR VkResult VKAPI_CALL
lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkExecutionGraphPipelineCreateInfoAMDX * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1436 lvp_CreateExecutionGraphPipelinesAMDX(VkDevice device, VkPipelineCache pipelineCache,
1437                                       uint32_t createInfoCount,
1438                                       const VkExecutionGraphPipelineCreateInfoAMDX *pCreateInfos,
1439                                       const VkAllocationCallbacks *pAllocator,
1440                                       VkPipeline *pPipelines)
1441 {
1442    VkResult result = VK_SUCCESS;
1443    uint32_t i = 0;
1444 
1445    for (; i < createInfoCount; i++) {
1446       VkPipelineCreateFlagBits2KHR flags = vk_graph_pipeline_create_flags(&pCreateInfos[i]);
1447 
1448       VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1449       if (!(flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR))
1450          r = lvp_exec_graph_pipeline_create(device, pipelineCache, &pCreateInfos[i], flags, &pPipelines[i]);
1451       if (r != VK_SUCCESS) {
1452          result = r;
1453          pPipelines[i] = VK_NULL_HANDLE;
1454          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1455             break;
1456       }
1457    }
1458    if (result != VK_SUCCESS) {
1459       for (; i < createInfoCount; i++)
1460          pPipelines[i] = VK_NULL_HANDLE;
1461    }
1462 
1463    return result;
1464 }
1465 
1466 VKAPI_ATTR VkResult VKAPI_CALL
lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device,VkPipeline executionGraph,VkExecutionGraphPipelineScratchSizeAMDX * pSizeInfo)1467 lvp_GetExecutionGraphPipelineScratchSizeAMDX(VkDevice device, VkPipeline executionGraph,
1468                                              VkExecutionGraphPipelineScratchSizeAMDX *pSizeInfo)
1469 {
1470    VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
1471    pSizeInfo->size = MAX2(pipeline->exec_graph.scratch_size * 32, 16);
1472    return VK_SUCCESS;
1473 }
1474 
1475 VKAPI_ATTR VkResult VKAPI_CALL
lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device,VkPipeline executionGraph,const VkPipelineShaderStageNodeCreateInfoAMDX * pNodeInfo,uint32_t * pNodeIndex)1476 lvp_GetExecutionGraphPipelineNodeIndexAMDX(VkDevice device, VkPipeline executionGraph,
1477                                            const VkPipelineShaderStageNodeCreateInfoAMDX *pNodeInfo,
1478                                            uint32_t *pNodeIndex)
1479 {
1480    VK_FROM_HANDLE(lvp_pipeline, pipeline, executionGraph);
1481 
1482    for (uint32_t i = 0; i < pipeline->num_groups; i++) {
1483       VK_FROM_HANDLE(lvp_pipeline, stage, pipeline->groups[i]);
1484       if (stage->exec_graph.index == pNodeInfo->index &&
1485           !strcmp(stage->exec_graph.name, pNodeInfo->pName)) {
1486          *pNodeIndex = i;
1487          return VK_SUCCESS;
1488       }
1489    }
1490 
1491    return VK_ERROR_OUT_OF_HOST_MEMORY;
1492 }
1493 #endif
1494