xref: /aosp_15_r20/external/mesa3d/src/broadcom/vulkan/v3dv_pipeline.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_util.h"
25 
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28 
29 #include "common/v3d_debug.h"
30 #include "qpu/qpu_disasm.h"
31 
32 #include "compiler/nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34 
35 #include "util/u_atomic.h"
36 #include "util/os_time.h"
37 
38 #include "vk_format.h"
39 #include "vk_nir_convert_ycbcr.h"
40 #include "vk_pipeline.h"
41 
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44 
45 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)46 v3dv_print_v3d_key(struct v3d_key *key,
47                    uint32_t v3d_key_size)
48 {
49    struct mesa_sha1 ctx;
50    unsigned char sha1[20];
51    char sha1buf[41];
52 
53    _mesa_sha1_init(&ctx);
54 
55    _mesa_sha1_update(&ctx, key, v3d_key_size);
56 
57    _mesa_sha1_final(&ctx, sha1);
58    _mesa_sha1_format(sha1buf, sha1);
59 
60    fprintf(stderr, "key %p: %s\n", key, sha1buf);
61 }
62 
63 static void
pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage * p_stage)64 pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
65 {
66    VkPipelineShaderStageCreateInfo info = {
67       .module = vk_shader_module_handle_from_nir(p_stage->nir),
68       .pName = p_stage->entrypoint,
69       .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
70    };
71 
72    vk_pipeline_hash_shader_stage(0, &info, NULL, p_stage->shader_sha1);
73 }
74 
75 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)76 v3dv_shader_variant_destroy(struct v3dv_device *device,
77                             struct v3dv_shader_variant *variant)
78 {
79    /* The assembly BO is shared by all variants in the pipeline, so it can't
80     * be freed here and should be freed with the pipeline
81     */
82    if (variant->qpu_insts) {
83       free(variant->qpu_insts);
84       variant->qpu_insts = NULL;
85    }
86    ralloc_free(variant->prog_data.base);
87    vk_free(&device->vk.alloc, variant);
88 }
89 
90 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)91 destroy_pipeline_stage(struct v3dv_device *device,
92                        struct v3dv_pipeline_stage *p_stage,
93                        const VkAllocationCallbacks *pAllocator)
94 {
95    if (!p_stage)
96       return;
97 
98    ralloc_free(p_stage->nir);
99    vk_free2(&device->vk.alloc, pAllocator, p_stage);
100 }
101 
102 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)103 pipeline_free_stages(struct v3dv_device *device,
104                      struct v3dv_pipeline *pipeline,
105                      const VkAllocationCallbacks *pAllocator)
106 {
107    assert(pipeline);
108 
109    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
110       destroy_pipeline_stage(device, pipeline->stages[stage], pAllocator);
111       pipeline->stages[stage] = NULL;
112    }
113 }
114 
115 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)116 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
117                       struct v3dv_device *device,
118                       const VkAllocationCallbacks *pAllocator)
119 {
120    if (!pipeline)
121       return;
122 
123    pipeline_free_stages(device, pipeline, pAllocator);
124 
125    if (pipeline->shared_data) {
126       v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
127       pipeline->shared_data = NULL;
128    }
129 
130    if (pipeline->spill.bo) {
131       assert(pipeline->spill.size_per_thread > 0);
132       v3dv_bo_free(device, pipeline->spill.bo);
133    }
134 
135    if (pipeline->default_attribute_values) {
136       v3dv_bo_free(device, pipeline->default_attribute_values);
137       pipeline->default_attribute_values = NULL;
138    }
139 
140    if (pipeline->executables.mem_ctx)
141       ralloc_free(pipeline->executables.mem_ctx);
142 
143    if (pipeline->layout)
144       v3dv_pipeline_layout_unref(device, pipeline->layout, pAllocator);
145 
146    vk_object_free(&device->vk, pAllocator, pipeline);
147 }
148 
149 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)150 v3dv_DestroyPipeline(VkDevice _device,
151                      VkPipeline _pipeline,
152                      const VkAllocationCallbacks *pAllocator)
153 {
154    V3DV_FROM_HANDLE(v3dv_device, device, _device);
155    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
156 
157    if (!pipeline)
158       return;
159 
160    v3dv_destroy_pipeline(pipeline, device, pAllocator);
161 }
162 
163 static const struct spirv_to_nir_options default_spirv_options =  {
164    .ubo_addr_format = nir_address_format_32bit_index_offset,
165    .ssbo_addr_format = nir_address_format_32bit_index_offset,
166    .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
167    .push_const_addr_format = nir_address_format_logical,
168    .shared_addr_format = nir_address_format_32bit_offset,
169 };
170 
171 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(const struct v3d_device_info * devinfo)172 v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo)
173 {
174    static bool initialized = false;
175    static nir_shader_compiler_options options = {
176       .lower_uadd_sat = true,
177       .lower_usub_sat = true,
178       .lower_iadd_sat = true,
179       .lower_all_io_to_temps = true,
180       .lower_extract_byte = true,
181       .lower_extract_word = true,
182       .lower_insert_byte = true,
183       .lower_insert_word = true,
184       .lower_bitfield_insert = true,
185       .lower_bitfield_extract = true,
186       .lower_bitfield_reverse = true,
187       .lower_bit_count = true,
188       .lower_cs_local_id_to_index = true,
189       .lower_ffract = true,
190       .lower_fmod = true,
191       .lower_pack_unorm_2x16 = true,
192       .lower_pack_snorm_2x16 = true,
193       .lower_unpack_unorm_2x16 = true,
194       .lower_unpack_snorm_2x16 = true,
195       .lower_pack_unorm_4x8 = true,
196       .lower_pack_snorm_4x8 = true,
197       .lower_unpack_unorm_4x8 = true,
198       .lower_unpack_snorm_4x8 = true,
199       .lower_pack_half_2x16 = true,
200       .lower_unpack_half_2x16 = true,
201       .lower_pack_32_2x16 = true,
202       .lower_pack_32_2x16_split = true,
203       .lower_unpack_32_2x16_split = true,
204       .lower_mul_2x32_64 = true,
205       .lower_fdiv = true,
206       .lower_find_lsb = true,
207       .lower_ffma16 = true,
208       .lower_ffma32 = true,
209       .lower_ffma64 = true,
210       .lower_flrp32 = true,
211       .lower_fpow = true,
212       .lower_fsqrt = true,
213       .lower_ifind_msb = true,
214       .lower_isign = true,
215       .lower_ldexp = true,
216       .lower_mul_high = true,
217       .lower_wpos_pntc = false,
218       .lower_to_scalar = true,
219       .lower_device_index_to_zero = true,
220       .lower_fquantize2f16 = true,
221       .lower_ufind_msb = true,
222       .has_fsub = true,
223       .has_isub = true,
224       .has_uclz = true,
225       .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
226                                       * needs to be supported */
227       .lower_interpolate_at = true,
228       .max_unroll_iterations = 16,
229       .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
230       .divergence_analysis_options =
231          nir_divergence_multiple_workgroup_per_compute_subgroup,
232       .discard_is_demote = true,
233       .has_ddx_intrinsics = true,
234       .scalarize_ddx = true,
235    };
236 
237    if (!initialized) {
238       options.lower_fsat = devinfo->ver < 71;
239       initialized = true;
240     }
241 
242    return &options;
243 }
244 
245 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _pipeline_layout,uint32_t set,uint32_t binding,uint32_t array_index)246 lookup_ycbcr_conversion(const void *_pipeline_layout, uint32_t set,
247                         uint32_t binding, uint32_t array_index)
248 {
249    struct v3dv_pipeline_layout *pipeline_layout =
250       (struct v3dv_pipeline_layout *) _pipeline_layout;
251 
252    assert(set < pipeline_layout->num_sets);
253    struct v3dv_descriptor_set_layout *set_layout =
254       pipeline_layout->set[set].layout;
255 
256    assert(binding < set_layout->binding_count);
257    struct v3dv_descriptor_set_binding_layout *bind_layout =
258       &set_layout->binding[binding];
259 
260    if (bind_layout->immutable_samplers_offset) {
261       const struct v3dv_sampler *immutable_samplers =
262          v3dv_immutable_samplers(set_layout, bind_layout);
263       const struct v3dv_sampler *sampler = &immutable_samplers[array_index];
264       return sampler->conversion ? &sampler->conversion->state : NULL;
265    } else {
266       return NULL;
267    }
268 }
269 
270 static void
preprocess_nir(nir_shader * nir)271 preprocess_nir(nir_shader *nir)
272 {
273    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
274       .frag_coord = true,
275       .point_coord = true,
276    };
277    NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
278 
279    /* Vulkan uses the separate-shader linking model */
280    nir->info.separate_shader = true;
281 
282    /* Make sure we lower variable initializers on output variables so that
283     * nir_remove_dead_variables below sees the corresponding stores
284     */
285    NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
286 
287    if (nir->info.stage == MESA_SHADER_FRAGMENT)
288       NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
289    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
290       NIR_PASS(_, nir, nir_lower_input_attachments,
291                  &(nir_input_attachment_options) {
292                     .use_fragcoord_sysval = false,
293                        });
294    }
295 
296    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
297               nir_shader_get_entrypoint(nir), true, false);
298 
299    NIR_PASS(_, nir, nir_lower_system_values);
300 
301    NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
302 
303    NIR_PASS(_, nir, nir_normalize_cubemap_coords);
304 
305    NIR_PASS(_, nir, nir_lower_global_vars_to_local);
306 
307    NIR_PASS(_, nir, nir_split_var_copies);
308    NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
309 
310    v3d_optimize_nir(NULL, nir);
311 
312    NIR_PASS(_, nir, nir_lower_explicit_io,
313             nir_var_mem_push_const,
314             nir_address_format_32bit_offset);
315 
316    NIR_PASS(_, nir, nir_lower_explicit_io,
317             nir_var_mem_ubo | nir_var_mem_ssbo,
318             nir_address_format_32bit_index_offset);
319 
320    NIR_PASS(_, nir, nir_lower_explicit_io,
321             nir_var_mem_global,
322             nir_address_format_2x32bit_global);
323 
324    NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
325 
326    /* Lower a bunch of stuff */
327    NIR_PASS(_, nir, nir_lower_var_copies);
328 
329    NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
330 
331    NIR_PASS(_, nir, nir_lower_indirect_derefs,
332             nir_var_function_temp, 2);
333 
334    NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
335             nir_var_mem_ubo | nir_var_mem_ssbo, NULL,
336             nir_lower_direct_array_deref_of_vec_load);
337 
338    NIR_PASS(_, nir, nir_lower_frexp);
339 
340    /* Get rid of split copies */
341    v3d_optimize_nir(NULL, nir);
342 }
343 
344 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)345 shader_module_compile_to_nir(struct v3dv_device *device,
346                              struct v3dv_pipeline_stage *stage)
347 {
348    assert(stage->module || stage->module_info);
349 
350    nir_shader *nir;
351    const nir_shader_compiler_options *nir_options =
352       v3dv_pipeline_get_nir_options(&device->devinfo);
353 
354    gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(stage->stage);
355 
356    const VkPipelineShaderStageCreateInfo stage_info = {
357       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
358       .pNext = !stage->module ? stage->module_info : NULL,
359       .stage = mesa_to_vk_shader_stage(gl_stage),
360       .module = vk_shader_module_to_handle((struct vk_shader_module *)stage->module),
361       .pName = stage->entrypoint,
362       .pSpecializationInfo = stage->spec_info,
363    };
364 
365    /* vk_pipeline_shader_stage_to_nir also handles internal shaders when
366     * module->nir != NULL. It also calls nir_validate_shader on both cases
367     * so we don't have to call it here.
368     */
369    VkResult result = vk_pipeline_shader_stage_to_nir(&device->vk,
370                                                      stage->pipeline->flags,
371                                                      &stage_info,
372                                                      &default_spirv_options,
373                                                      nir_options,
374                                                      NULL, &nir);
375    if (result != VK_SUCCESS)
376       return NULL;
377    assert(nir->info.stage == gl_stage);
378 
379    if (V3D_DBG(SHADERDB) && (!stage->module || stage->module->nir == NULL)) {
380       char sha1buf[41];
381       _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
382       nir->info.name = ralloc_strdup(nir, sha1buf);
383    }
384 
385    if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
386       fprintf(stderr, "NIR after vk_pipeline_shader_stage_to_nir: %s prog %d NIR:\n",
387               broadcom_shader_stage_name(stage->stage),
388               stage->program_id);
389       nir_print_shader(nir, stderr);
390       fprintf(stderr, "\n");
391    }
392 
393    preprocess_nir(nir);
394 
395    return nir;
396 }
397 
398 static int
type_size_vec4(const struct glsl_type * type,bool bindless)399 type_size_vec4(const struct glsl_type *type, bool bindless)
400 {
401    return glsl_count_attribute_slots(type, false);
402 }
403 
404 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
405  * rethink.
406  */
407 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size,uint8_t plane)408 descriptor_map_add(struct v3dv_descriptor_map *map,
409                    int set,
410                    int binding,
411                    int array_index,
412                    int array_size,
413                    int start_index,
414                    uint8_t return_size,
415                    uint8_t plane)
416 {
417    assert(array_index < array_size);
418    assert(return_size == 16 || return_size == 32);
419 
420    unsigned index = start_index;
421    for (; index < map->num_desc; index++) {
422       if (map->used[index] &&
423           set == map->set[index] &&
424           binding == map->binding[index] &&
425           array_index == map->array_index[index] &&
426           plane == map->plane[index]) {
427          assert(array_size == map->array_size[index]);
428          if (return_size != map->return_size[index]) {
429             /* It the return_size is different it means that the same sampler
430              * was used for operations with different precision
431              * requirement. In this case we need to ensure that we use the
432              * larger one.
433              */
434             map->return_size[index] = 32;
435          }
436          return index;
437       } else if (!map->used[index]) {
438          break;
439       }
440    }
441 
442    assert(index < DESCRIPTOR_MAP_SIZE);
443    assert(!map->used[index]);
444 
445    map->used[index] = true;
446    map->set[index] = set;
447    map->binding[index] = binding;
448    map->array_index[index] = array_index;
449    map->array_size[index] = array_size;
450    map->return_size[index] = return_size;
451    map->plane[index] = plane;
452    map->num_desc = MAX2(map->num_desc, index + 1);
453 
454    return index;
455 }
456 
457 struct lower_pipeline_layout_state {
458    struct v3dv_pipeline *pipeline;
459    const struct v3dv_pipeline_layout *layout;
460    bool needs_default_sampler_state;
461 };
462 
463 
464 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)465 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
466                          struct lower_pipeline_layout_state *state)
467 {
468    assert(instr->intrinsic == nir_intrinsic_load_push_constant);
469    instr->intrinsic = nir_intrinsic_load_uniform;
470 }
471 
472 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)473 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
474                             VkDescriptorType desc_type,
475                             gl_shader_stage gl_stage,
476                             bool is_sampler)
477 {
478    enum broadcom_shader_stage broadcom_stage =
479       gl_shader_stage_to_broadcom(gl_stage);
480 
481    assert(pipeline->shared_data &&
482           pipeline->shared_data->maps[broadcom_stage]);
483 
484    switch(desc_type) {
485    case VK_DESCRIPTOR_TYPE_SAMPLER:
486       return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
487    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
488    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
489    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
490    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
491    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
492       return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
493    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
494       return is_sampler ?
495          &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
496          &pipeline->shared_data->maps[broadcom_stage]->texture_map;
497    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
498    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
499    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
500       return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
501    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
502    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
503       return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
504    default:
505       unreachable("Descriptor type unknown or not having a descriptor map");
506    }
507 }
508 
509 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
510  * could be used by the v3d_compiler */
511 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)512 lower_vulkan_resource_index(nir_builder *b,
513                             nir_intrinsic_instr *instr,
514                             struct lower_pipeline_layout_state *state)
515 {
516    assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
517 
518    nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
519 
520    unsigned set = nir_intrinsic_desc_set(instr);
521    unsigned binding = nir_intrinsic_binding(instr);
522    struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
523    struct v3dv_descriptor_set_binding_layout *binding_layout =
524       &set_layout->binding[binding];
525    unsigned index = 0;
526 
527    switch (binding_layout->type) {
528    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
529    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
530    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
531    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
532    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
533       struct v3dv_descriptor_map *descriptor_map =
534          pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
535                                      b->shader->info.stage, false);
536 
537       if (!const_val)
538          unreachable("non-constant vulkan_resource_index array index");
539 
540       /* At compile-time we will need to know if we are processing a UBO load
541        * for an inline or a regular UBO so we can handle inline loads like
542        * push constants. At the level of NIR level however, the inline
543        * information is gone, so we rely on the index to make this distinction.
544        * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
545        * inline buffers. This means that at the descriptor map level
546        * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
547        * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
548        */
549       uint32_t start_index = 0;
550       if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
551           binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
552          start_index += MAX_INLINE_UNIFORM_BUFFERS;
553       }
554 
555       index = descriptor_map_add(descriptor_map, set, binding,
556                                  const_val->u32,
557                                  binding_layout->array_size,
558                                  start_index,
559                                  32 /* return_size: doesn't really apply for this case */,
560                                  0);
561       break;
562    }
563 
564    default:
565       unreachable("unsupported descriptor type for vulkan_resource_index");
566       break;
567    }
568 
569    /* Since we use the deref pass, both vulkan_resource_index and
570     * vulkan_load_descriptor return a vec2 providing an index and
571     * offset. Our backend compiler only cares about the index part.
572     */
573    nir_def_replace(&instr->def, nir_imm_ivec2(b, index, 0));
574 }
575 
576 static uint8_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)577 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
578 {
579    int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
580    if (plane_src_idx < 0)
581        return 0;
582 
583    uint8_t plane = nir_src_as_uint(tex->src[plane_src_idx].src);
584    nir_tex_instr_remove_src(tex, plane_src_idx);
585    return plane;
586 }
587 
588 /* Returns return_size, so it could be used for the case of not having a
589  * sampler object
590  */
591 static uint8_t
lower_tex_src(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct lower_pipeline_layout_state * state)592 lower_tex_src(nir_builder *b,
593               nir_tex_instr *instr,
594               unsigned src_idx,
595               struct lower_pipeline_layout_state *state)
596 {
597    nir_def *index = NULL;
598    unsigned base_index = 0;
599    unsigned array_elements = 1;
600    nir_tex_src *src = &instr->src[src_idx];
601    bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
602 
603    uint8_t plane = tex_instr_get_and_remove_plane_src(instr);
604 
605    /* We compute first the offsets */
606    nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
607    while (deref->deref_type != nir_deref_type_var) {
608       nir_deref_instr *parent =
609          nir_instr_as_deref(deref->parent.ssa->parent_instr);
610 
611       assert(deref->deref_type == nir_deref_type_array);
612 
613       if (nir_src_is_const(deref->arr.index) && index == NULL) {
614          /* We're still building a direct index */
615          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
616       } else {
617          if (index == NULL) {
618             /* We used to be direct but not anymore */
619             index = nir_imm_int(b, base_index);
620             base_index = 0;
621          }
622 
623          index = nir_iadd(b, index,
624                           nir_imul_imm(b, deref->arr.index.ssa,
625                                        array_elements));
626       }
627 
628       array_elements *= glsl_get_length(parent->type);
629 
630       deref = parent;
631    }
632 
633    if (index)
634       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
635 
636    /* We have the offsets, we apply them, rewriting the source or removing
637     * instr if needed
638     */
639    if (index) {
640       nir_src_rewrite(&src->src, index);
641 
642       src->src_type = is_sampler ?
643          nir_tex_src_sampler_offset :
644          nir_tex_src_texture_offset;
645    } else {
646       nir_tex_instr_remove_src(instr, src_idx);
647    }
648 
649    uint32_t set = deref->var->data.descriptor_set;
650    uint32_t binding = deref->var->data.binding;
651    /* FIXME: this is a really simplified check for the precision to be used
652     * for the sampling. Right now we are only checking for the variables used
653     * on the operation itself, but there are other cases that we could use to
654     * infer the precision requirement.
655     */
656    bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
657                             deref->var->data.precision == GLSL_PRECISION_LOW;
658    struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
659    struct v3dv_descriptor_set_binding_layout *binding_layout =
660       &set_layout->binding[binding];
661 
662    /* For input attachments, the shader includes the attachment_idx. As we are
663     * treating them as a texture, we only want the base_index
664     */
665    uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
666       deref->var->data.index + base_index :
667       base_index;
668 
669    uint8_t return_size;
670    if (V3D_DBG(TMU_16BIT))
671       return_size = 16;
672    else  if (V3D_DBG(TMU_32BIT))
673       return_size = 32;
674    else
675       return_size = relaxed_precision ? 16 : 32;
676 
677    struct v3dv_descriptor_map *map =
678       pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
679                                   b->shader->info.stage, is_sampler);
680    int desc_index =
681       descriptor_map_add(map,
682                          deref->var->data.descriptor_set,
683                          deref->var->data.binding,
684                          array_index,
685                          binding_layout->array_size,
686                          0,
687                          return_size,
688                          plane);
689 
690    if (is_sampler)
691       instr->sampler_index = desc_index;
692    else
693       instr->texture_index = desc_index;
694 
695    return return_size;
696 }
697 
698 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct lower_pipeline_layout_state * state)699 lower_sampler(nir_builder *b,
700               nir_tex_instr *instr,
701               struct lower_pipeline_layout_state *state)
702 {
703    uint8_t return_size = 0;
704 
705    int texture_idx =
706       nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
707 
708    if (texture_idx >= 0)
709       return_size = lower_tex_src(b, instr, texture_idx, state);
710 
711    int sampler_idx =
712       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
713 
714    if (sampler_idx >= 0) {
715       assert(nir_tex_instr_need_sampler(instr));
716       lower_tex_src(b, instr, sampler_idx, state);
717    }
718 
719    if (texture_idx < 0 && sampler_idx < 0)
720       return false;
721 
722    /* If the instruction doesn't have a sampler (i.e. txf) we use backend_flags
723     * to bind a default sampler state to configure precission.
724     */
725    if (sampler_idx < 0) {
726       state->needs_default_sampler_state = true;
727       instr->backend_flags = return_size == 16 ?
728          V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
729    }
730 
731    return true;
732 }
733 
734 /* FIXME: really similar to lower_tex_src, perhaps refactor? */
735 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)736 lower_image_deref(nir_builder *b,
737                   nir_intrinsic_instr *instr,
738                   struct lower_pipeline_layout_state *state)
739 {
740    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
741    nir_def *index = NULL;
742    unsigned array_elements = 1;
743    unsigned base_index = 0;
744 
745    while (deref->deref_type != nir_deref_type_var) {
746       nir_deref_instr *parent =
747          nir_instr_as_deref(deref->parent.ssa->parent_instr);
748 
749       assert(deref->deref_type == nir_deref_type_array);
750 
751       if (nir_src_is_const(deref->arr.index) && index == NULL) {
752          /* We're still building a direct index */
753          base_index += nir_src_as_uint(deref->arr.index) * array_elements;
754       } else {
755          if (index == NULL) {
756             /* We used to be direct but not anymore */
757             index = nir_imm_int(b, base_index);
758             base_index = 0;
759          }
760 
761          index = nir_iadd(b, index,
762                           nir_imul_imm(b, deref->arr.index.ssa,
763                                        array_elements));
764       }
765 
766       array_elements *= glsl_get_length(parent->type);
767 
768       deref = parent;
769    }
770 
771    if (index)
772       index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
773 
774    uint32_t set = deref->var->data.descriptor_set;
775    uint32_t binding = deref->var->data.binding;
776    struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
777    struct v3dv_descriptor_set_binding_layout *binding_layout =
778       &set_layout->binding[binding];
779 
780    uint32_t array_index = deref->var->data.index + base_index;
781 
782    assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
783           binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
784 
785    struct v3dv_descriptor_map *map =
786       pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
787                                   b->shader->info.stage, false);
788 
789    int desc_index =
790       descriptor_map_add(map,
791                          deref->var->data.descriptor_set,
792                          deref->var->data.binding,
793                          array_index,
794                          binding_layout->array_size,
795                          0,
796                          32 /* return_size: doesn't apply for textures */,
797                          0);
798 
799    /* Note: we don't need to do anything here in relation to the precision and
800     * the output size because for images we can infer that info from the image
801     * intrinsic, that includes the image format (see
802     * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
803     */
804 
805    index = nir_imm_int(b, desc_index);
806 
807    nir_rewrite_image_intrinsic(instr, index, false);
808 }
809 
810 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)811 lower_intrinsic(nir_builder *b,
812                 nir_intrinsic_instr *instr,
813                 struct lower_pipeline_layout_state *state)
814 {
815    switch (instr->intrinsic) {
816    case nir_intrinsic_load_push_constant:
817       lower_load_push_constant(b, instr, state);
818       return true;
819 
820    case nir_intrinsic_vulkan_resource_index:
821       lower_vulkan_resource_index(b, instr, state);
822       return true;
823 
824    case nir_intrinsic_load_vulkan_descriptor: {
825       /* Loading the descriptor happens as part of load/store instructions,
826        * so for us this is a no-op.
827        */
828       nir_def_replace(&instr->def, instr->src[0].ssa);
829       return true;
830    }
831 
832    case nir_intrinsic_image_deref_load:
833    case nir_intrinsic_image_deref_store:
834    case nir_intrinsic_image_deref_atomic:
835    case nir_intrinsic_image_deref_atomic_swap:
836    case nir_intrinsic_image_deref_size:
837    case nir_intrinsic_image_deref_samples:
838       lower_image_deref(b, instr, state);
839       return true;
840 
841    default:
842       return false;
843    }
844 }
845 
846 static bool
lower_pipeline_layout_cb(nir_builder * b,nir_instr * instr,void * _state)847 lower_pipeline_layout_cb(nir_builder *b,
848                          nir_instr *instr,
849                          void *_state)
850 {
851    bool progress = false;
852    struct lower_pipeline_layout_state *state = _state;
853 
854    b->cursor = nir_before_instr(instr);
855    switch (instr->type) {
856    case nir_instr_type_tex:
857       progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
858       break;
859    case nir_instr_type_intrinsic:
860       progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
861       break;
862    default:
863       break;
864    }
865 
866    return progress;
867 }
868 
869 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout,bool * needs_default_sampler_state)870 lower_pipeline_layout_info(nir_shader *shader,
871                            struct v3dv_pipeline *pipeline,
872                            const struct v3dv_pipeline_layout *layout,
873                            bool *needs_default_sampler_state)
874 {
875    bool progress = false;
876 
877    struct lower_pipeline_layout_state state = {
878       .pipeline = pipeline,
879       .layout = layout,
880       .needs_default_sampler_state = false,
881    };
882 
883    progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
884                                            nir_metadata_control_flow,
885                                            &state);
886 
887    *needs_default_sampler_state = state.needs_default_sampler_state;
888 
889    return progress;
890 }
891 
892 /* This flips gl_PointCoord.y to match Vulkan requirements */
893 static bool
lower_point_coord_cb(nir_builder * b,nir_intrinsic_instr * intr,void * _state)894 lower_point_coord_cb(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
895 {
896    if (intr->intrinsic != nir_intrinsic_load_input)
897       return false;
898 
899    if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PNTC)
900       return false;
901 
902    b->cursor = nir_after_instr(&intr->instr);
903    nir_def *result = &intr->def;
904    result =
905       nir_vector_insert_imm(b, result,
906                             nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
907    nir_def_rewrite_uses_after(&intr->def,
908                                   result, result->parent_instr);
909    return true;
910 }
911 
912 static bool
v3d_nir_lower_point_coord(nir_shader * s)913 v3d_nir_lower_point_coord(nir_shader *s)
914 {
915    assert(s->info.stage == MESA_SHADER_FRAGMENT);
916    return nir_shader_intrinsics_pass(s, lower_point_coord_cb,
917                                        nir_metadata_control_flow, NULL);
918 }
919 
920 static void
lower_fs_io(nir_shader * nir)921 lower_fs_io(nir_shader *nir)
922 {
923    /* Our backend doesn't handle array fragment shader outputs */
924    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
925    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
926 
927    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
928                                MESA_SHADER_FRAGMENT);
929 
930    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
931                                MESA_SHADER_FRAGMENT);
932 
933    NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
934             type_size_vec4, 0);
935 }
936 
937 static void
lower_gs_io(struct nir_shader * nir)938 lower_gs_io(struct nir_shader *nir)
939 {
940    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
941 
942    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
943                                MESA_SHADER_GEOMETRY);
944 
945    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
946                                MESA_SHADER_GEOMETRY);
947 }
948 
949 static void
lower_vs_io(struct nir_shader * nir)950 lower_vs_io(struct nir_shader *nir)
951 {
952    NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
953 
954    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
955                                MESA_SHADER_VERTEX);
956 
957    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
958                                MESA_SHADER_VERTEX);
959 
960    /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
961     * overlaps with v3d_nir_lower_io. Need further research though.
962     */
963 }
964 
965 static void
shader_debug_output(const char * message,void * data)966 shader_debug_output(const char *message, void *data)
967 {
968    /* FIXME: We probably don't want to debug anything extra here, and in fact
969     * the compiler is not using this callback too much, only as an alternative
970     * way to debug out the shaderdb stats, that you can already get using
971     * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
972     * compiler to remove that callback.
973     */
974 }
975 
976 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables)977 pipeline_populate_v3d_key(struct v3d_key *key,
978                           const struct v3dv_pipeline_stage *p_stage,
979                           uint32_t ucp_enables)
980 {
981    assert(p_stage->pipeline->shared_data &&
982           p_stage->pipeline->shared_data->maps[p_stage->stage]);
983 
984    /* The following values are default values used at pipeline create. We use
985     * there 32 bit as default return size.
986     */
987    struct v3dv_descriptor_map *sampler_map =
988       &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
989    struct v3dv_descriptor_map *texture_map =
990       &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
991 
992    key->num_tex_used = texture_map->num_desc;
993    assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
994    for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
995       key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
996       key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
997       key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
998       key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
999    }
1000 
1001    key->num_samplers_used = sampler_map->num_desc;
1002    assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1003    for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1004         sampler_idx++) {
1005       key->sampler[sampler_idx].return_size =
1006          sampler_map->return_size[sampler_idx];
1007 
1008       key->sampler[sampler_idx].return_channels =
1009          key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1010    }
1011 
1012    switch (p_stage->stage) {
1013    case BROADCOM_SHADER_VERTEX:
1014    case BROADCOM_SHADER_VERTEX_BIN:
1015       key->is_last_geometry_stage =
1016          p_stage->pipeline->stages[BROADCOM_SHADER_GEOMETRY] == NULL;
1017       break;
1018    case BROADCOM_SHADER_GEOMETRY:
1019    case BROADCOM_SHADER_GEOMETRY_BIN:
1020       /* FIXME: while we don't implement tessellation shaders */
1021       key->is_last_geometry_stage = true;
1022       break;
1023    case BROADCOM_SHADER_FRAGMENT:
1024    case BROADCOM_SHADER_COMPUTE:
1025       key->is_last_geometry_stage = false;
1026       break;
1027    default:
1028       unreachable("unsupported shader stage");
1029    }
1030 
1031    /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1032     * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1033     * takes care of adding a single compact array variable at
1034     * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1035     *
1036     * The only lowering we are interested is specific to the fragment shader,
1037     * where we want to emit discards to honor writes to gl_ClipDistance[] in
1038     * previous stages. This is done via nir_lower_clip_fs() so we only set up
1039     * the ucp enable mask for that stage.
1040     */
1041    key->ucp_enables = ucp_enables;
1042 
1043    const VkPipelineRobustnessBufferBehaviorEXT robust_buffer_enabled =
1044       VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1045 
1046    const VkPipelineRobustnessImageBehaviorEXT robust_image_enabled =
1047       VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT;
1048 
1049    key->robust_uniform_access =
1050       p_stage->robustness.uniform_buffers == robust_buffer_enabled;
1051    key->robust_storage_access =
1052       p_stage->robustness.storage_buffers == robust_buffer_enabled;
1053    key->robust_image_access =
1054       p_stage->robustness.images == robust_image_enabled;
1055 }
1056 
1057 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1058  * same. For not using prim_mode that is the one already used on v3d
1059  */
1060 static const enum mesa_prim vk_to_mesa_prim[] = {
1061    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = MESA_PRIM_POINTS,
1062    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = MESA_PRIM_LINES,
1063    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = MESA_PRIM_LINE_STRIP,
1064    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = MESA_PRIM_TRIANGLES,
1065    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = MESA_PRIM_TRIANGLE_STRIP,
1066    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = MESA_PRIM_TRIANGLE_FAN,
1067    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = MESA_PRIM_LINES_ADJACENCY,
1068    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = MESA_PRIM_LINE_STRIP_ADJACENCY,
1069    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = MESA_PRIM_TRIANGLES_ADJACENCY,
1070    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = MESA_PRIM_TRIANGLE_STRIP_ADJACENCY,
1071 };
1072 
1073 uint32_t
v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim)1074 v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim)
1075 {
1076    return v3d_hw_prim_type(vk_to_mesa_prim[vk_prim]);
1077 }
1078 
1079 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1080    [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1081    [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1082    [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1083    [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1084    [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1085    [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1086    [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1087    [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1088    [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1089    [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1090    [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1091    [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1092    [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1093    [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1094    [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1095    [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1096 };
1097 
1098 static bool
enable_line_smooth(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)1099 enable_line_smooth(struct v3dv_pipeline *pipeline,
1100                    const VkPipelineRasterizationStateCreateInfo *rs_info)
1101 {
1102    if (!pipeline->rasterization_enabled)
1103       return false;
1104 
1105    const VkPipelineRasterizationLineStateCreateInfoKHR *ls_info =
1106       vk_find_struct_const(rs_info->pNext,
1107                            PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_KHR);
1108 
1109    if (!ls_info)
1110       return false;
1111 
1112    /* Although topology is dynamic now, the topology class can't change
1113     * because we don't support dynamicPrimitiveTopologyUnrestricted, so we can
1114     * use the static topology from the pipeline for this.
1115     */
1116    switch(pipeline->topology) {
1117    case MESA_PRIM_LINES:
1118    case MESA_PRIM_LINE_LOOP:
1119    case MESA_PRIM_LINE_STRIP:
1120    case MESA_PRIM_LINES_ADJACENCY:
1121    case MESA_PRIM_LINE_STRIP_ADJACENCY:
1122       return ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
1123    default:
1124       return false;
1125    }
1126 }
1127 
1128 static void
v3d_fs_key_set_color_attachment(struct v3d_fs_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t index,VkFormat fb_format)1129 v3d_fs_key_set_color_attachment(struct v3d_fs_key *key,
1130                                 const struct v3dv_pipeline_stage *p_stage,
1131                                 uint32_t index,
1132                                 VkFormat fb_format)
1133 {
1134    key->cbufs |= 1 << index;
1135 
1136    enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1137 
1138    /* If logic operations are enabled then we might emit color reads and we
1139     * need to know the color buffer format and swizzle for that
1140     */
1141    if (key->logicop_func != PIPE_LOGICOP_COPY) {
1142       /* Framebuffer formats should be single plane */
1143       assert(vk_format_get_plane_count(fb_format) == 1);
1144       key->color_fmt[index].format = fb_pipe_format;
1145       memcpy(key->color_fmt[index].swizzle,
1146              v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format, 0),
1147              sizeof(key->color_fmt[index].swizzle));
1148    }
1149 
1150    const struct util_format_description *desc =
1151       vk_format_description(fb_format);
1152 
1153    if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1154        desc->channel[0].size == 32) {
1155       key->f32_color_rb |= 1 << index;
1156    }
1157 
1158    if (p_stage->nir->info.fs.untyped_color_outputs) {
1159       if (util_format_is_pure_uint(fb_pipe_format))
1160          key->uint_color_rb |= 1 << index;
1161       else if (util_format_is_pure_sint(fb_pipe_format))
1162          key->int_color_rb |= 1 << index;
1163    }
1164 }
1165 
1166 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_render_pass_state * rendering_info,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1167 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1168                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1169                              const struct vk_render_pass_state *rendering_info,
1170                              const struct v3dv_pipeline_stage *p_stage,
1171                              bool has_geometry_shader,
1172                              uint32_t ucp_enables)
1173 {
1174    assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1175 
1176    memset(key, 0, sizeof(*key));
1177 
1178    struct v3dv_device *device = p_stage->pipeline->device;
1179    assert(device);
1180 
1181    pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables);
1182 
1183    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1184       pCreateInfo->pInputAssemblyState;
1185    uint8_t topology = vk_to_mesa_prim[ia_info->topology];
1186 
1187    key->is_points = (topology == MESA_PRIM_POINTS);
1188    key->is_lines = (topology >= MESA_PRIM_LINES &&
1189                     topology <= MESA_PRIM_LINE_STRIP);
1190 
1191    if (key->is_points) {
1192       /* This mask represents state for GL_ARB_point_sprite which is not
1193        * relevant to Vulkan.
1194        */
1195       key->point_sprite_mask = 0;
1196 
1197       /* Vulkan mandates upper left. */
1198       key->point_coord_upper_left = true;
1199    }
1200 
1201    key->has_gs = has_geometry_shader;
1202 
1203    const VkPipelineColorBlendStateCreateInfo *cb_info =
1204       p_stage->pipeline->rasterization_enabled ?
1205       pCreateInfo->pColorBlendState : NULL;
1206 
1207    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1208                        vk_to_pipe_logicop[cb_info->logicOp] :
1209                        PIPE_LOGICOP_COPY;
1210 
1211    /* Multisample rasterization state must be ignored if rasterization
1212     * is disabled.
1213     */
1214    const VkPipelineMultisampleStateCreateInfo *ms_info =
1215       p_stage->pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL;
1216    if (ms_info) {
1217       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1218              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1219       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1220 
1221       if (key->msaa)
1222          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1223 
1224       key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1225    }
1226 
1227    key->line_smoothing = enable_line_smooth(p_stage->pipeline,
1228                                             pCreateInfo->pRasterizationState);
1229 
1230    /* This is intended for V3D versions before 4.1, otherwise we just use the
1231     * tile buffer load/store swap R/B bit.
1232     */
1233    key->swap_color_rb = 0;
1234 
1235    for (uint32_t i = 0; i < rendering_info->color_attachment_count; i++) {
1236       if (rendering_info->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
1237          continue;
1238       v3d_fs_key_set_color_attachment(key, p_stage, i,
1239                                       rendering_info->color_attachment_formats[i]);
1240    }
1241 }
1242 
1243 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1244 setup_stage_outputs_from_next_stage_inputs(
1245    uint8_t next_stage_num_inputs,
1246    struct v3d_varying_slot *next_stage_input_slots,
1247    uint8_t *num_used_outputs,
1248    struct v3d_varying_slot *used_output_slots,
1249    uint32_t size_of_used_output_slots)
1250 {
1251    *num_used_outputs = next_stage_num_inputs;
1252    memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1253 }
1254 
1255 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1256 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1257                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1258                              const struct v3dv_pipeline_stage *p_stage)
1259 {
1260    assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1261           p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1262 
1263    struct v3dv_device *device = p_stage->pipeline->device;
1264    assert(device);
1265 
1266    memset(key, 0, sizeof(*key));
1267 
1268    pipeline_populate_v3d_key(&key->base, p_stage, 0);
1269 
1270    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1271 
1272    key->per_vertex_point_size =
1273       p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1274 
1275    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1276 
1277    assert(key->base.is_last_geometry_stage);
1278    if (key->is_coord) {
1279       /* Output varyings in the last binning shader are only used for transform
1280        * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1281        */
1282       key->num_used_outputs = 0;
1283    } else {
1284       struct v3dv_shader_variant *fs_variant =
1285          pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1286 
1287       STATIC_ASSERT(sizeof(key->used_outputs) ==
1288                     sizeof(fs_variant->prog_data.fs->input_slots));
1289 
1290       setup_stage_outputs_from_next_stage_inputs(
1291          fs_variant->prog_data.fs->num_inputs,
1292          fs_variant->prog_data.fs->input_slots,
1293          &key->num_used_outputs,
1294          key->used_outputs,
1295          sizeof(key->used_outputs));
1296    }
1297 }
1298 
1299 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1300 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1301                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1302                              const struct v3dv_pipeline_stage *p_stage)
1303 {
1304    assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1305           p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1306 
1307    struct v3dv_device *device = p_stage->pipeline->device;
1308    assert(device);
1309 
1310    memset(key, 0, sizeof(*key));
1311    pipeline_populate_v3d_key(&key->base, p_stage, 0);
1312 
1313    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1314 
1315    key->per_vertex_point_size =
1316       p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1317 
1318    key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1319 
1320    if (key->is_coord) { /* Binning VS*/
1321       if (key->base.is_last_geometry_stage) {
1322          /* Output varyings in the last binning shader are only used for
1323           * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1324           * supported.
1325           */
1326          key->num_used_outputs = 0;
1327       } else {
1328          /* Linking against GS binning program */
1329          assert(pipeline->stages[BROADCOM_SHADER_GEOMETRY]);
1330          struct v3dv_shader_variant *gs_bin_variant =
1331             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1332 
1333          STATIC_ASSERT(sizeof(key->used_outputs) ==
1334                        sizeof(gs_bin_variant->prog_data.gs->input_slots));
1335 
1336          setup_stage_outputs_from_next_stage_inputs(
1337             gs_bin_variant->prog_data.gs->num_inputs,
1338             gs_bin_variant->prog_data.gs->input_slots,
1339             &key->num_used_outputs,
1340             key->used_outputs,
1341             sizeof(key->used_outputs));
1342       }
1343    } else { /* Render VS */
1344       if (pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
1345          /* Linking against GS render program */
1346          struct v3dv_shader_variant *gs_variant =
1347             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1348 
1349          STATIC_ASSERT(sizeof(key->used_outputs) ==
1350                        sizeof(gs_variant->prog_data.gs->input_slots));
1351 
1352          setup_stage_outputs_from_next_stage_inputs(
1353             gs_variant->prog_data.gs->num_inputs,
1354             gs_variant->prog_data.gs->input_slots,
1355             &key->num_used_outputs,
1356             key->used_outputs,
1357             sizeof(key->used_outputs));
1358       } else {
1359          /* Linking against FS program */
1360          struct v3dv_shader_variant *fs_variant =
1361             pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1362 
1363          STATIC_ASSERT(sizeof(key->used_outputs) ==
1364                        sizeof(fs_variant->prog_data.fs->input_slots));
1365 
1366          setup_stage_outputs_from_next_stage_inputs(
1367             fs_variant->prog_data.fs->num_inputs,
1368             fs_variant->prog_data.fs->input_slots,
1369             &key->num_used_outputs,
1370             key->used_outputs,
1371             sizeof(key->used_outputs));
1372       }
1373    }
1374 
1375    const VkPipelineVertexInputStateCreateInfo *vi_info =
1376       pCreateInfo->pVertexInputState;
1377    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1378       const VkVertexInputAttributeDescription *desc =
1379          &vi_info->pVertexAttributeDescriptions[i];
1380       assert(desc->location < MAX_VERTEX_ATTRIBS);
1381       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
1382           desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
1383          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1384       }
1385    }
1386 }
1387 
1388 /**
1389  * Creates the initial form of the pipeline stage for a binning shader by
1390  * cloning the render shader and flagging it as a coordinate shader.
1391  *
1392  * Returns NULL if it was not able to allocate the object, so it should be
1393  * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1394  */
1395 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1396 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1397                               const VkAllocationCallbacks *pAllocator)
1398 {
1399    struct v3dv_device *device = src->pipeline->device;
1400 
1401    struct v3dv_pipeline_stage *p_stage =
1402       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1403                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1404 
1405    if (p_stage == NULL)
1406       return NULL;
1407 
1408    assert(src->stage == BROADCOM_SHADER_VERTEX ||
1409           src->stage == BROADCOM_SHADER_GEOMETRY);
1410 
1411    enum broadcom_shader_stage bin_stage =
1412       src->stage == BROADCOM_SHADER_VERTEX ?
1413          BROADCOM_SHADER_VERTEX_BIN :
1414          BROADCOM_SHADER_GEOMETRY_BIN;
1415 
1416    p_stage->pipeline = src->pipeline;
1417    p_stage->stage = bin_stage;
1418    p_stage->entrypoint = src->entrypoint;
1419    p_stage->module = src->module;
1420    p_stage->module_info = src->module_info;
1421 
1422    /* For binning shaders we will clone the NIR code from the corresponding
1423     * render shader later, when we call pipeline_compile_xxx_shader. This way
1424     * we only have to run the relevant NIR lowerings once for render shaders
1425     */
1426    p_stage->nir = NULL;
1427    p_stage->program_id = src->program_id;
1428    p_stage->spec_info = src->spec_info;
1429    p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1430    p_stage->robustness = src->robustness;
1431    memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1432 
1433    return p_stage;
1434 }
1435 
1436 /*
1437  * Based on some creation flags we assume that the QPU would be needed later
1438  * to gather further info. In that case we just keep the qput_insts around,
1439  * instead of map/unmap the bo later.
1440  */
1441 static bool
pipeline_keep_qpu(struct v3dv_pipeline * pipeline)1442 pipeline_keep_qpu(struct v3dv_pipeline *pipeline)
1443 {
1444    return pipeline->flags &
1445       (VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR |
1446        VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR);
1447 }
1448 
1449 /**
1450  * Returns false if it was not able to allocate or map the assembly bo memory.
1451  */
1452 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1453 upload_assembly(struct v3dv_pipeline *pipeline)
1454 {
1455    uint32_t total_size = 0;
1456    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1457       struct v3dv_shader_variant *variant =
1458          pipeline->shared_data->variants[stage];
1459 
1460       if (variant != NULL)
1461          total_size += variant->qpu_insts_size;
1462    }
1463 
1464    struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1465                                       "pipeline shader assembly", true);
1466    if (!bo) {
1467       fprintf(stderr, "failed to allocate memory for shader\n");
1468       return false;
1469    }
1470 
1471    bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1472    if (!ok) {
1473       fprintf(stderr, "failed to map source shader buffer\n");
1474       return false;
1475    }
1476 
1477    uint32_t offset = 0;
1478    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1479       struct v3dv_shader_variant *variant =
1480          pipeline->shared_data->variants[stage];
1481 
1482       if (variant != NULL) {
1483          variant->assembly_offset = offset;
1484 
1485          memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1486          offset += variant->qpu_insts_size;
1487 
1488          if (!pipeline_keep_qpu(pipeline)) {
1489             free(variant->qpu_insts);
1490             variant->qpu_insts = NULL;
1491          }
1492       }
1493    }
1494    assert(total_size == offset);
1495 
1496    pipeline->shared_data->assembly_bo = bo;
1497 
1498    return true;
1499 }
1500 
1501 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1502 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1503                        struct v3dv_pipeline_key *key,
1504                        unsigned char *sha1_out)
1505 {
1506    struct mesa_sha1 ctx;
1507    _mesa_sha1_init(&ctx);
1508 
1509    if (pipeline->layout) {
1510       _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1511                         sizeof(pipeline->layout->sha1));
1512    }
1513 
1514    /* We need to include all shader stages in the sha1 key as linking may
1515     * modify the shader code in any stage. An alternative would be to use the
1516     * serialized NIR, but that seems like an overkill.
1517     */
1518    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1519       if (broadcom_shader_stage_is_binning(stage))
1520          continue;
1521 
1522       struct v3dv_pipeline_stage *p_stage = pipeline->stages[stage];
1523       if (p_stage == NULL)
1524          continue;
1525 
1526       assert(stage != BROADCOM_SHADER_COMPUTE);
1527 
1528       _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1529    }
1530 
1531    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1532 
1533    _mesa_sha1_final(&ctx, sha1_out);
1534 }
1535 
1536 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1537 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1538                       struct v3dv_pipeline_key *key,
1539                       unsigned char *sha1_out)
1540 {
1541    struct mesa_sha1 ctx;
1542    _mesa_sha1_init(&ctx);
1543 
1544    if (pipeline->layout) {
1545       _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1546                         sizeof(pipeline->layout->sha1));
1547    }
1548 
1549    struct v3dv_pipeline_stage *p_stage =
1550       pipeline->stages[BROADCOM_SHADER_COMPUTE];
1551 
1552    _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1553 
1554    _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1555 
1556    _mesa_sha1_final(&ctx, sha1_out);
1557 }
1558 
1559 /* Checks that the pipeline has enough spill size to use for any of their
1560  * variants
1561  */
1562 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1563 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1564 {
1565    uint32_t max_spill_size = 0;
1566 
1567    for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1568       struct v3dv_shader_variant *variant =
1569          pipeline->shared_data->variants[stage];
1570 
1571       if (variant != NULL) {
1572          max_spill_size = MAX2(variant->prog_data.base->spill_size,
1573                                max_spill_size);
1574       }
1575    }
1576 
1577    if (max_spill_size > 0) {
1578       struct v3dv_device *device = pipeline->device;
1579 
1580       /* The TIDX register we use for choosing the area to access
1581        * for scratch space is: (core << 6) | (qpu << 2) | thread.
1582        * Even at minimum threadcount in a particular shader, that
1583        * means we still multiply by qpus by 4.
1584        */
1585       const uint32_t total_spill_size =
1586          4 * device->devinfo.qpu_count * max_spill_size;
1587       if (pipeline->spill.bo) {
1588          assert(pipeline->spill.size_per_thread > 0);
1589          v3dv_bo_free(device, pipeline->spill.bo);
1590       }
1591       pipeline->spill.bo =
1592          v3dv_bo_alloc(device, total_spill_size, "spill", true);
1593       pipeline->spill.size_per_thread = max_spill_size;
1594    }
1595 }
1596 
1597 /**
1598  * Creates a new shader_variant_create. Note that for prog_data is not const,
1599  * so it is assumed that the caller will prove a pointer that the
1600  * shader_variant will own.
1601  *
1602  * Creation doesn't include allocate a BO to store the content of qpu_insts,
1603  * as we will try to share the same bo for several shader variants. Also note
1604  * that qpu_ints being NULL is valid, for example if we are creating the
1605  * shader_variants from the cache, so we can just upload the assembly of all
1606  * the shader stages at once.
1607  */
1608 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1609 v3dv_shader_variant_create(struct v3dv_device *device,
1610                            enum broadcom_shader_stage stage,
1611                            struct v3d_prog_data *prog_data,
1612                            uint32_t prog_data_size,
1613                            uint32_t assembly_offset,
1614                            uint64_t *qpu_insts,
1615                            uint32_t qpu_insts_size,
1616                            VkResult *out_vk_result)
1617 {
1618    struct v3dv_shader_variant *variant =
1619       vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1620                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1621 
1622    if (variant == NULL) {
1623       *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1624       return NULL;
1625    }
1626 
1627    variant->stage = stage;
1628    variant->prog_data_size = prog_data_size;
1629    variant->prog_data.base = prog_data;
1630 
1631    variant->assembly_offset = assembly_offset;
1632    variant->qpu_insts_size = qpu_insts_size;
1633    variant->qpu_insts = qpu_insts;
1634 
1635    *out_vk_result = VK_SUCCESS;
1636 
1637    return variant;
1638 }
1639 
1640 /* For a given key, it returns the compiled version of the shader.  Returns a
1641  * new reference to the shader_variant to the caller, or NULL.
1642  *
1643  * If the method returns NULL it means that something wrong happened:
1644  *   * Not enough memory: this is one of the possible outcomes defined by
1645  *     vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1646  *   * Compilation error: hypothetically this shouldn't happen, as the spec
1647  *     states that vkShaderModule needs to be created with a valid SPIR-V, so
1648  *     any compilation failure is a driver bug. In the practice, something as
1649  *     common as failing to register allocate can lead to a compilation
1650  *     failure. In that case the only option (for any driver) is
1651  *     VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1652  *     error.
1653  */
1654 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1655 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1656                                 struct v3d_key *key,
1657                                 size_t key_size,
1658                                 const VkAllocationCallbacks *pAllocator,
1659                                 VkResult *out_vk_result)
1660 {
1661    int64_t stage_start = os_time_get_nano();
1662 
1663    struct v3dv_pipeline *pipeline = p_stage->pipeline;
1664    struct v3dv_physical_device *physical_device = pipeline->device->pdevice;
1665    const struct v3d_compiler *compiler = physical_device->compiler;
1666    gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(p_stage->stage);
1667 
1668    if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
1669       fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1670               broadcom_shader_stage_name(p_stage->stage),
1671               p_stage->program_id);
1672       nir_print_shader(p_stage->nir, stderr);
1673       fprintf(stderr, "\n");
1674    }
1675 
1676    uint64_t *qpu_insts;
1677    uint32_t qpu_insts_size;
1678    struct v3d_prog_data *prog_data;
1679    uint32_t prog_data_size = v3d_prog_data_size(gl_stage);
1680 
1681    qpu_insts = v3d_compile(compiler,
1682                            key, &prog_data,
1683                            p_stage->nir,
1684                            shader_debug_output, NULL,
1685                            p_stage->program_id, 0,
1686                            &qpu_insts_size);
1687 
1688    struct v3dv_shader_variant *variant = NULL;
1689 
1690    if (!qpu_insts) {
1691       fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1692               broadcom_shader_stage_name(p_stage->stage),
1693               p_stage->program_id);
1694       *out_vk_result = VK_ERROR_UNKNOWN;
1695    } else {
1696       variant =
1697          v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1698                                     prog_data, prog_data_size,
1699                                     0, /* assembly_offset, no final value yet */
1700                                     qpu_insts, qpu_insts_size,
1701                                     out_vk_result);
1702    }
1703    /* At this point we don't need anymore the nir shader, but we are freeing
1704     * all the temporary p_stage structs used during the pipeline creation when
1705     * we finish it, so let's not worry about freeing the nir here.
1706     */
1707 
1708    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1709 
1710    return variant;
1711 }
1712 
1713 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1714 link_shaders(nir_shader *producer, nir_shader *consumer)
1715 {
1716    assert(producer);
1717    assert(consumer);
1718 
1719    if (producer->options->lower_to_scalar) {
1720       NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1721       NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1722    }
1723 
1724    nir_lower_io_arrays_to_elements(producer, consumer);
1725 
1726    v3d_optimize_nir(NULL, producer);
1727    v3d_optimize_nir(NULL, consumer);
1728 
1729    if (nir_link_opt_varyings(producer, consumer))
1730       v3d_optimize_nir(NULL, consumer);
1731 
1732    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1733    NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1734 
1735    if (nir_remove_unused_varyings(producer, consumer)) {
1736       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1737       NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1738 
1739       v3d_optimize_nir(NULL, producer);
1740       v3d_optimize_nir(NULL, consumer);
1741 
1742       /* Optimizations can cause varyings to become unused.
1743        * nir_compact_varyings() depends on all dead varyings being removed so
1744        * we need to call nir_remove_dead_variables() again here.
1745        */
1746       NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1747       NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1748    }
1749 }
1750 
1751 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1752 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1753                    struct v3dv_pipeline_stage *p_stage,
1754                    struct v3dv_pipeline_layout *layout)
1755 {
1756    int64_t stage_start = os_time_get_nano();
1757 
1758    assert(pipeline->shared_data &&
1759           pipeline->shared_data->maps[p_stage->stage]);
1760 
1761    NIR_PASS_V(p_stage->nir, nir_vk_lower_ycbcr_tex,
1762               lookup_ycbcr_conversion, layout);
1763 
1764    nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1765 
1766    /* We add this because we need a valid sampler for nir_lower_tex to do
1767     * unpacking of the texture operation result, even for the case where there
1768     * is no sampler state.
1769     *
1770     * We add two of those, one for the case we need a 16bit return_size, and
1771     * another for the case we need a 32bit return size.
1772     */
1773    struct v3dv_descriptor_maps *maps =
1774       pipeline->shared_data->maps[p_stage->stage];
1775 
1776    UNUSED unsigned index;
1777    index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16, 0);
1778    assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1779 
1780    index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32, 0);
1781    assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1782 
1783    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1784    bool needs_default_sampler_state = false;
1785    NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1786             &needs_default_sampler_state);
1787 
1788    /* If in the end we didn't need to use the default sampler states and the
1789     * shader doesn't need any other samplers, get rid of them so we can
1790     * recognize that this program doesn't use any samplers at all.
1791     */
1792    if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1793       maps->sampler_map.num_desc = 0;
1794 
1795    p_stage->feedback.duration += os_time_get_nano() - stage_start;
1796 }
1797 
1798 /**
1799  * The SPIR-V compiler will insert a sized compact array for
1800  * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1801  * where the size of the array determines the number of active clip planes.
1802  */
1803 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1804 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1805 {
1806    assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1807    const nir_shader *shader = p_stage->nir;
1808    assert(shader);
1809 
1810    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1811       if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1812          assert(var->data.compact);
1813          return (1 << glsl_get_length(var->type)) - 1;
1814       }
1815    }
1816    return 0;
1817 }
1818 
1819 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1820 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1821                        struct v3dv_pipeline *pipeline,
1822                        struct v3dv_pipeline_cache *cache)
1823 {
1824    int64_t stage_start = os_time_get_nano();
1825 
1826    nir_shader *nir = NULL;
1827    const nir_shader_compiler_options *nir_options =
1828       v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
1829 
1830    nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1831                                             nir_options,
1832                                             p_stage->shader_sha1);
1833 
1834    if (nir) {
1835       assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1836 
1837       /* A NIR cache hit doesn't avoid the large majority of pipeline stage
1838        * creation so the cache hit is not recorded in the pipeline feedback
1839        * flags
1840        */
1841 
1842       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1843 
1844       return nir;
1845    }
1846 
1847    nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1848 
1849    if (nir) {
1850       struct v3dv_pipeline_cache *default_cache =
1851          &pipeline->device->default_pipeline_cache;
1852 
1853       v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1854                                      p_stage->shader_sha1);
1855 
1856       /* Ensure that the variant is on the default cache, as cmd_buffer could
1857        * need to change the current variant
1858        */
1859       if (default_cache != cache) {
1860          v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1861                                         p_stage->shader_sha1);
1862       }
1863 
1864       p_stage->feedback.duration += os_time_get_nano() - stage_start;
1865 
1866       return nir;
1867    }
1868 
1869    /* FIXME: this shouldn't happen, raise error? */
1870    return NULL;
1871 }
1872 
1873 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1874 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1875                                const VkAllocationCallbacks *pAllocator,
1876                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1877 {
1878    struct v3dv_pipeline_stage *p_stage_vs =
1879       pipeline->stages[BROADCOM_SHADER_VERTEX];
1880    struct v3dv_pipeline_stage *p_stage_vs_bin =
1881       pipeline->stages[BROADCOM_SHADER_VERTEX_BIN];
1882 
1883    assert(p_stage_vs_bin != NULL);
1884    if (p_stage_vs_bin->nir == NULL) {
1885       assert(p_stage_vs->nir);
1886       p_stage_vs_bin->nir = nir_shader_clone(NULL, p_stage_vs->nir);
1887    }
1888 
1889    VkResult vk_result;
1890    struct v3d_vs_key key;
1891    pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs);
1892    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1893       pipeline_compile_shader_variant(p_stage_vs, &key.base, sizeof(key),
1894                                       pAllocator, &vk_result);
1895    if (vk_result != VK_SUCCESS)
1896       return vk_result;
1897 
1898    pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs_bin);
1899    pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1900       pipeline_compile_shader_variant(p_stage_vs_bin, &key.base, sizeof(key),
1901                                       pAllocator, &vk_result);
1902 
1903    return vk_result;
1904 }
1905 
1906 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1907 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1908                                  const VkAllocationCallbacks *pAllocator,
1909                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1910 {
1911    struct v3dv_pipeline_stage *p_stage_gs =
1912       pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1913    struct v3dv_pipeline_stage *p_stage_gs_bin =
1914       pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN];
1915 
1916    assert(p_stage_gs);
1917    assert(p_stage_gs_bin != NULL);
1918    if (p_stage_gs_bin->nir == NULL) {
1919       assert(p_stage_gs->nir);
1920       p_stage_gs_bin->nir = nir_shader_clone(NULL, p_stage_gs->nir);
1921    }
1922 
1923    VkResult vk_result;
1924    struct v3d_gs_key key;
1925    pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs);
1926    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1927       pipeline_compile_shader_variant(p_stage_gs, &key.base, sizeof(key),
1928                                       pAllocator, &vk_result);
1929    if (vk_result != VK_SUCCESS)
1930       return vk_result;
1931 
1932    pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs_bin);
1933    pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1934       pipeline_compile_shader_variant(p_stage_gs_bin, &key.base, sizeof(key),
1935                                       pAllocator, &vk_result);
1936 
1937    return vk_result;
1938 }
1939 
1940 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1941 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1942                                  const VkAllocationCallbacks *pAllocator,
1943                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
1944 {
1945    struct v3dv_pipeline_stage *p_stage_vs =
1946       pipeline->stages[BROADCOM_SHADER_VERTEX];
1947    struct v3dv_pipeline_stage *p_stage_fs =
1948       pipeline->stages[BROADCOM_SHADER_FRAGMENT];
1949    struct v3dv_pipeline_stage *p_stage_gs =
1950       pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1951 
1952    struct v3d_fs_key key;
1953    pipeline_populate_v3d_fs_key(&key, pCreateInfo, &pipeline->rendering_info,
1954                                 p_stage_fs, p_stage_gs != NULL,
1955                                 get_ucp_enable_mask(p_stage_vs));
1956 
1957    if (key.is_points) {
1958       assert(key.point_coord_upper_left);
1959       NIR_PASS(_, p_stage_fs->nir, v3d_nir_lower_point_coord);
1960    }
1961 
1962    VkResult vk_result;
1963    pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1964       pipeline_compile_shader_variant(p_stage_fs, &key.base, sizeof(key),
1965                                       pAllocator, &vk_result);
1966 
1967    return vk_result;
1968 }
1969 
1970 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1971 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1972                                struct v3dv_pipeline_key *key,
1973                                const VkGraphicsPipelineCreateInfo *pCreateInfo)
1974 {
1975    struct v3dv_device *device = pipeline->device;
1976    assert(device);
1977 
1978    memset(key, 0, sizeof(*key));
1979 
1980    key->line_smooth = pipeline->line_smooth;
1981 
1982    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1983       pCreateInfo->pInputAssemblyState;
1984    key->topology = vk_to_mesa_prim[ia_info->topology];
1985 
1986    const VkPipelineColorBlendStateCreateInfo *cb_info =
1987       pipeline->rasterization_enabled ? pCreateInfo->pColorBlendState : NULL;
1988 
1989    key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1990       vk_to_pipe_logicop[cb_info->logicOp] :
1991       PIPE_LOGICOP_COPY;
1992 
1993    /* Multisample rasterization state must be ignored if rasterization
1994     * is disabled.
1995     */
1996    const VkPipelineMultisampleStateCreateInfo *ms_info =
1997       pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL;
1998    if (ms_info) {
1999       assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2000              ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2001       key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2002 
2003       if (key->msaa)
2004          key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2005 
2006       key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2007    }
2008 
2009    struct vk_render_pass_state *ri = &pipeline->rendering_info;
2010    for (uint32_t i = 0; i < ri->color_attachment_count; i++) {
2011       if (ri->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
2012          continue;
2013 
2014       key->cbufs |= 1 << i;
2015 
2016       VkFormat fb_format = ri->color_attachment_formats[i];
2017       enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2018 
2019       /* If logic operations are enabled then we might emit color reads and we
2020        * need to know the color buffer format and swizzle for that
2021        */
2022       if (key->logicop_func != PIPE_LOGICOP_COPY) {
2023          /* Framebuffer formats should be single plane */
2024          assert(vk_format_get_plane_count(fb_format) == 1);
2025          key->color_fmt[i].format = fb_pipe_format;
2026          memcpy(key->color_fmt[i].swizzle,
2027                 v3dv_get_format_swizzle(pipeline->device, fb_format, 0),
2028                 sizeof(key->color_fmt[i].swizzle));
2029       }
2030 
2031       const struct util_format_description *desc =
2032          vk_format_description(fb_format);
2033 
2034       if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2035           desc->channel[0].size == 32) {
2036          key->f32_color_rb |= 1 << i;
2037       }
2038    }
2039 
2040    const VkPipelineVertexInputStateCreateInfo *vi_info =
2041       pCreateInfo->pVertexInputState;
2042    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2043       const VkVertexInputAttributeDescription *desc =
2044          &vi_info->pVertexAttributeDescriptions[i];
2045       assert(desc->location < MAX_VERTEX_ATTRIBS);
2046       if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
2047           desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
2048          key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2049       }
2050    }
2051 
2052    key->has_multiview = ri->view_mask != 0;
2053 }
2054 
2055 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2056 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2057                               struct v3dv_pipeline_key *key,
2058                               const VkComputePipelineCreateInfo *pCreateInfo)
2059 {
2060    struct v3dv_device *device = pipeline->device;
2061    assert(device);
2062 
2063    /* We use the same pipeline key for graphics and compute, but we don't need
2064     * to add a field to flag compute keys because this key is not used alone
2065     * to search in the cache, we also use the SPIR-V or the serialized NIR for
2066     * example, which already flags compute shaders.
2067     */
2068    memset(key, 0, sizeof(*key));
2069 }
2070 
2071 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2072 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2073                                     struct v3dv_pipeline *pipeline,
2074                                     bool is_graphics_pipeline)
2075 {
2076    /* We create new_entry using the device alloc. Right now shared_data is ref
2077     * and unref by both the pipeline and the pipeline cache, so we can't
2078     * ensure that the cache or pipeline alloc will be available on the last
2079     * unref.
2080     */
2081    struct v3dv_pipeline_shared_data *new_entry =
2082       vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2083                  sizeof(struct v3dv_pipeline_shared_data), 8,
2084                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2085 
2086    if (new_entry == NULL)
2087       return NULL;
2088 
2089    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2090       /* We don't need specific descriptor maps for binning stages we use the
2091        * map for the render stage.
2092        */
2093       if (broadcom_shader_stage_is_binning(stage))
2094          continue;
2095 
2096       if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2097           (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2098          continue;
2099       }
2100 
2101       if (stage == BROADCOM_SHADER_GEOMETRY &&
2102           !pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
2103          /* We always inject a custom GS if we have multiview */
2104          if (!pipeline->rendering_info.view_mask)
2105             continue;
2106       }
2107 
2108       struct v3dv_descriptor_maps *new_maps =
2109          vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2110                     sizeof(struct v3dv_descriptor_maps), 8,
2111                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2112 
2113       if (new_maps == NULL)
2114          goto fail;
2115 
2116       new_entry->maps[stage] = new_maps;
2117    }
2118 
2119    new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2120       new_entry->maps[BROADCOM_SHADER_VERTEX];
2121 
2122    new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2123       new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2124 
2125    new_entry->ref_cnt = 1;
2126    memcpy(new_entry->sha1_key, sha1_key, 20);
2127 
2128    return new_entry;
2129 
2130 fail:
2131    if (new_entry != NULL) {
2132       for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2133          if (new_entry->maps[stage] != NULL)
2134             vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2135       }
2136    }
2137 
2138    vk_free(&pipeline->device->vk.alloc, new_entry);
2139 
2140    return NULL;
2141 }
2142 
2143 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedback * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2144 write_creation_feedback(struct v3dv_pipeline *pipeline,
2145                         const void *next,
2146                         const VkPipelineCreationFeedback *pipeline_feedback,
2147                         uint32_t stage_count,
2148                         const VkPipelineShaderStageCreateInfo *stages)
2149 {
2150    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2151       vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2152 
2153    if (create_feedback) {
2154       typed_memcpy(create_feedback->pPipelineCreationFeedback,
2155              pipeline_feedback,
2156              1);
2157 
2158       const uint32_t feedback_stage_count =
2159          create_feedback->pipelineStageCreationFeedbackCount;
2160       assert(feedback_stage_count <= stage_count);
2161 
2162       for (uint32_t i = 0; i < feedback_stage_count; i++) {
2163          gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2164          enum broadcom_shader_stage bs = gl_shader_stage_to_broadcom(s);
2165 
2166          create_feedback->pPipelineStageCreationFeedbacks[i] =
2167             pipeline->stages[bs]->feedback;
2168 
2169          if (broadcom_shader_stage_is_render_with_binning(bs)) {
2170             enum broadcom_shader_stage bs_bin =
2171                broadcom_binning_shader_stage_for_render_stage(bs);
2172             create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2173                pipeline->stages[bs_bin]->feedback.duration;
2174          }
2175       }
2176    }
2177 }
2178 
2179 /* Note that although PrimitiveTopology is now dynamic, it is still safe to
2180  * compute the gs_input/output_primitive from the topology saved at the
2181  * pipeline, as the topology class will not change, because we don't support
2182  * dynamicPrimitiveTopologyUnrestricted
2183  */
2184 static enum mesa_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2185 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2186 {
2187    switch (pipeline->topology) {
2188    case MESA_PRIM_POINTS:
2189       return MESA_PRIM_POINTS;
2190    case MESA_PRIM_LINES:
2191    case MESA_PRIM_LINE_STRIP:
2192       return MESA_PRIM_LINES;
2193    case MESA_PRIM_TRIANGLES:
2194    case MESA_PRIM_TRIANGLE_STRIP:
2195    case MESA_PRIM_TRIANGLE_FAN:
2196       return MESA_PRIM_TRIANGLES;
2197    default:
2198       /* Since we don't allow GS with multiview, we can only see non-adjacency
2199        * primitives.
2200        */
2201       unreachable("Unexpected pipeline primitive type");
2202    }
2203 }
2204 
2205 static enum mesa_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2206 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2207 {
2208    switch (pipeline->topology) {
2209    case MESA_PRIM_POINTS:
2210       return MESA_PRIM_POINTS;
2211    case MESA_PRIM_LINES:
2212    case MESA_PRIM_LINE_STRIP:
2213       return MESA_PRIM_LINE_STRIP;
2214    case MESA_PRIM_TRIANGLES:
2215    case MESA_PRIM_TRIANGLE_STRIP:
2216    case MESA_PRIM_TRIANGLE_FAN:
2217       return MESA_PRIM_TRIANGLE_STRIP;
2218    default:
2219       /* Since we don't allow GS with multiview, we can only see non-adjacency
2220        * primitives.
2221        */
2222       unreachable("Unexpected pipeline primitive type");
2223    }
2224 }
2225 
2226 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2227 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2228                           struct v3dv_pipeline_cache *cache,
2229                           const VkAllocationCallbacks *pAllocator)
2230 {
2231    /* Create the passthrough GS from the VS output interface */
2232    struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2233    p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2234    nir_shader *vs_nir = p_stage_vs->nir;
2235 
2236    const nir_shader_compiler_options *options =
2237       v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
2238    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2239                                                   "multiview broadcast gs");
2240    nir_shader *nir = b.shader;
2241    nir->info.inputs_read = vs_nir->info.outputs_written;
2242    nir->info.outputs_written = vs_nir->info.outputs_written |
2243                                (1ull << VARYING_SLOT_LAYER);
2244 
2245    uint32_t vertex_count = mesa_vertices_per_prim(pipeline->topology);
2246    nir->info.gs.input_primitive =
2247       multiview_gs_input_primitive_from_pipeline(pipeline);
2248    nir->info.gs.output_primitive =
2249       multiview_gs_output_primitive_from_pipeline(pipeline);
2250    nir->info.gs.vertices_in = vertex_count;
2251    nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2252    nir->info.gs.invocations = 1;
2253    nir->info.gs.active_stream_mask = 0x1;
2254 
2255    /* Make a list of GS input/output variables from the VS outputs */
2256    nir_variable *in_vars[100];
2257    nir_variable *out_vars[100];
2258    uint32_t var_count = 0;
2259    nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2260       char name[8];
2261       snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2262 
2263       in_vars[var_count] =
2264          nir_variable_create(nir, nir_var_shader_in,
2265                              glsl_array_type(out_vs_var->type, vertex_count, 0),
2266                              name);
2267       in_vars[var_count]->data.location = out_vs_var->data.location;
2268       in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2269       in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2270 
2271       snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2272       out_vars[var_count] =
2273          nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2274       out_vars[var_count]->data.location = out_vs_var->data.location;
2275       out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2276 
2277       var_count++;
2278    }
2279 
2280    /* Add the gl_Layer output variable */
2281    nir_variable *out_layer =
2282       nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2283                           "out_Layer");
2284    out_layer->data.location = VARYING_SLOT_LAYER;
2285 
2286    /* Get the view index value that we will write to gl_Layer */
2287    nir_def *layer =
2288       nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2289 
2290    /* Emit all output vertices */
2291    for (uint32_t vi = 0; vi < vertex_count; vi++) {
2292       /* Emit all output varyings */
2293       for (uint32_t i = 0; i < var_count; i++) {
2294          nir_deref_instr *in_value =
2295             nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2296          nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2297       }
2298 
2299       /* Emit gl_Layer write */
2300       nir_store_var(&b, out_layer, layer, 0x1);
2301 
2302       nir_emit_vertex(&b, 0);
2303    }
2304    nir_end_primitive(&b, 0);
2305 
2306    /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2307     * with what we expect from SPIR-V modules.
2308     */
2309    preprocess_nir(nir);
2310 
2311    /* Attach the geometry shader to the  pipeline */
2312    struct v3dv_device *device = pipeline->device;
2313    struct v3dv_physical_device *physical_device = device->pdevice;
2314 
2315    struct v3dv_pipeline_stage *p_stage =
2316       vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2317                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2318 
2319    if (p_stage == NULL) {
2320       ralloc_free(nir);
2321       return false;
2322    }
2323 
2324    p_stage->pipeline = pipeline;
2325    p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2326    p_stage->entrypoint = "main";
2327    p_stage->module = NULL;
2328    p_stage->module_info = NULL;
2329    p_stage->nir = nir;
2330    pipeline_compute_sha1_from_nir(p_stage);
2331    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2332    p_stage->robustness = pipeline->stages[BROADCOM_SHADER_VERTEX]->robustness;
2333 
2334    pipeline->has_gs = true;
2335    pipeline->stages[BROADCOM_SHADER_GEOMETRY] = p_stage;
2336    pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2337 
2338    pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] =
2339       pipeline_stage_create_binning(p_stage, pAllocator);
2340    if (pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] == NULL)
2341       return false;
2342 
2343    return true;
2344 }
2345 
2346 static void
pipeline_check_buffer_device_address(struct v3dv_pipeline * pipeline)2347 pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2348 {
2349    for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2350       struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2351       if (variant && variant->prog_data.base->has_global_address) {
2352          pipeline->uses_buffer_device_address = true;
2353          return;
2354       }
2355    }
2356 
2357    pipeline->uses_buffer_device_address = false;
2358 }
2359 
2360 /*
2361  * It compiles a pipeline. Note that it also allocate internal object, but if
2362  * some allocations success, but other fails, the method is not freeing the
2363  * successful ones.
2364  *
2365  * This is done to simplify the code, as what we do in this case is just call
2366  * the pipeline destroy method, and this would handle freeing the internal
2367  * objects allocated. We just need to be careful setting to NULL the objects
2368  * not allocated.
2369  */
2370 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2371 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2372                           struct v3dv_pipeline_cache *cache,
2373                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
2374                           const VkAllocationCallbacks *pAllocator)
2375 {
2376    VkPipelineCreationFeedback pipeline_feedback = {
2377       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2378    };
2379    int64_t pipeline_start = os_time_get_nano();
2380 
2381    struct v3dv_device *device = pipeline->device;
2382    struct v3dv_physical_device *physical_device = device->pdevice;
2383 
2384    /* First pass to get some common info from the shader, and create the
2385     * individual pipeline_stage objects
2386     */
2387    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2388       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2389       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2390 
2391       struct v3dv_pipeline_stage *p_stage =
2392          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2393                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2394 
2395       if (p_stage == NULL)
2396          return VK_ERROR_OUT_OF_HOST_MEMORY;
2397 
2398       p_stage->program_id =
2399          p_atomic_inc_return(&physical_device->next_program_id);
2400 
2401       enum broadcom_shader_stage broadcom_stage =
2402          gl_shader_stage_to_broadcom(stage);
2403 
2404       p_stage->pipeline = pipeline;
2405       p_stage->stage = broadcom_stage;
2406       p_stage->entrypoint = sinfo->pName;
2407       p_stage->module = vk_shader_module_from_handle(sinfo->module);
2408       p_stage->spec_info = sinfo->pSpecializationInfo;
2409       if (!p_stage->module) {
2410          p_stage->module_info =
2411             vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
2412       }
2413 
2414       vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2415                                         pCreateInfo->pNext, sinfo->pNext);
2416 
2417       vk_pipeline_hash_shader_stage(pipeline->flags,
2418                                     &pCreateInfo->pStages[i],
2419                                     &p_stage->robustness,
2420                                     p_stage->shader_sha1);
2421 
2422       pipeline->active_stages |= sinfo->stage;
2423 
2424       /* We will try to get directly the compiled shader variant, so let's not
2425        * worry about getting the nir shader for now.
2426        */
2427       p_stage->nir = NULL;
2428       pipeline->stages[broadcom_stage] = p_stage;
2429       if (broadcom_stage == BROADCOM_SHADER_GEOMETRY)
2430          pipeline->has_gs = true;
2431 
2432       if (broadcom_shader_stage_is_render_with_binning(broadcom_stage)) {
2433          enum broadcom_shader_stage broadcom_stage_bin =
2434             broadcom_binning_shader_stage_for_render_stage(broadcom_stage);
2435 
2436          pipeline->stages[broadcom_stage_bin] =
2437             pipeline_stage_create_binning(p_stage, pAllocator);
2438 
2439          if (pipeline->stages[broadcom_stage_bin] == NULL)
2440             return VK_ERROR_OUT_OF_HOST_MEMORY;
2441       }
2442    }
2443 
2444    /* Add a no-op fragment shader if needed */
2445    if (!pipeline->stages[BROADCOM_SHADER_FRAGMENT]) {
2446       const nir_shader_compiler_options *compiler_options =
2447          v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
2448       nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2449                                                      compiler_options,
2450                                                      "noop_fs");
2451 
2452       struct v3dv_pipeline_stage *p_stage =
2453          vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2454                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2455 
2456       if (p_stage == NULL)
2457          return VK_ERROR_OUT_OF_HOST_MEMORY;
2458 
2459       p_stage->pipeline = pipeline;
2460       p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2461       p_stage->entrypoint = "main";
2462       p_stage->module = NULL;
2463       p_stage->module_info = NULL;
2464       p_stage->nir = b.shader;
2465       vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2466                                         NULL, NULL);
2467       pipeline_compute_sha1_from_nir(p_stage);
2468       p_stage->program_id =
2469          p_atomic_inc_return(&physical_device->next_program_id);
2470 
2471       pipeline->stages[BROADCOM_SHADER_FRAGMENT] = p_stage;
2472       pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2473    }
2474 
2475    /* If multiview is enabled, we inject a custom passthrough geometry shader
2476     * to broadcast draw calls to the appropriate views.
2477     */
2478    const uint32_t view_mask = pipeline->rendering_info.view_mask;
2479    assert(!view_mask ||
2480           (!pipeline->has_gs && !pipeline->stages[BROADCOM_SHADER_GEOMETRY]));
2481    if (view_mask) {
2482       if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2483          return VK_ERROR_OUT_OF_HOST_MEMORY;
2484    }
2485 
2486    /* First we try to get the variants from the pipeline cache (unless we are
2487     * required to capture internal representations, since in that case we need
2488     * compile).
2489     */
2490    bool needs_executable_info =
2491       pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2492    if (!needs_executable_info) {
2493       struct v3dv_pipeline_key pipeline_key;
2494       pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2495       pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2496 
2497       bool cache_hit = false;
2498 
2499       pipeline->shared_data =
2500          v3dv_pipeline_cache_search_for_pipeline(cache,
2501                                                  pipeline->sha1,
2502                                                  &cache_hit);
2503 
2504       if (pipeline->shared_data != NULL) {
2505          /* A correct pipeline must have at least a VS and FS */
2506          assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2507          assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2508          assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2509          assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2510                 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2511          assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2512                 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2513 
2514          if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2515             pipeline_feedback.flags |=
2516                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2517 
2518          goto success;
2519       }
2520    }
2521 
2522    if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2523       return VK_PIPELINE_COMPILE_REQUIRED;
2524 
2525    /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2526     * shader or the pipeline cache) and compile.
2527     */
2528    pipeline->shared_data =
2529       v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2530    if (!pipeline->shared_data)
2531       return VK_ERROR_OUT_OF_HOST_MEMORY;
2532 
2533    struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2534    struct v3dv_pipeline_stage *p_stage_fs = pipeline->stages[BROADCOM_SHADER_FRAGMENT];
2535    struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY];
2536 
2537    p_stage_vs->feedback.flags |=
2538       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2539    if (p_stage_gs)
2540       p_stage_gs->feedback.flags |=
2541          VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2542    p_stage_fs->feedback.flags |=
2543       VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2544 
2545    if (!p_stage_vs->nir)
2546       p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2547    if (p_stage_gs && !p_stage_gs->nir)
2548       p_stage_gs->nir = pipeline_stage_get_nir(p_stage_gs, pipeline, cache);
2549    if (!p_stage_fs->nir)
2550       p_stage_fs->nir = pipeline_stage_get_nir(p_stage_fs, pipeline, cache);
2551 
2552    /* Linking + pipeline lowerings */
2553    if (p_stage_gs) {
2554       link_shaders(p_stage_gs->nir, p_stage_fs->nir);
2555       link_shaders(p_stage_vs->nir, p_stage_gs->nir);
2556    } else {
2557       link_shaders(p_stage_vs->nir, p_stage_fs->nir);
2558    }
2559 
2560    pipeline_lower_nir(pipeline, p_stage_fs, pipeline->layout);
2561    lower_fs_io(p_stage_fs->nir);
2562 
2563    if (p_stage_gs) {
2564       pipeline_lower_nir(pipeline, p_stage_gs, pipeline->layout);
2565       lower_gs_io(p_stage_gs->nir);
2566    }
2567 
2568    pipeline_lower_nir(pipeline, p_stage_vs, pipeline->layout);
2569    lower_vs_io(p_stage_vs->nir);
2570 
2571    /* Compiling to vir */
2572    VkResult vk_result;
2573 
2574    /* We should have got all the variants or no variants from the cache */
2575    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2576    vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator,
2577                                                 pCreateInfo);
2578    if (vk_result != VK_SUCCESS)
2579       return vk_result;
2580 
2581    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2582           !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2583 
2584    if (p_stage_gs) {
2585       vk_result =
2586          pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2587       if (vk_result != VK_SUCCESS)
2588          return vk_result;
2589    }
2590 
2591    assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2592           !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2593 
2594    vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2595    if (vk_result != VK_SUCCESS)
2596       return vk_result;
2597 
2598    if (!upload_assembly(pipeline))
2599       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2600 
2601    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2602 
2603  success:
2604 
2605    pipeline_check_buffer_device_address(pipeline);
2606 
2607    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2608    write_creation_feedback(pipeline,
2609                            pCreateInfo->pNext,
2610                            &pipeline_feedback,
2611                            pCreateInfo->stageCount,
2612                            pCreateInfo->pStages);
2613 
2614    /* Since we have the variants in the pipeline shared data we can now free
2615     * the pipeline stages.
2616     */
2617    if (!needs_executable_info)
2618       pipeline_free_stages(device, pipeline, pAllocator);
2619 
2620    pipeline_check_spill_size(pipeline);
2621 
2622    return compute_vpm_config(pipeline);
2623 }
2624 
2625 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2626 compute_vpm_config(struct v3dv_pipeline *pipeline)
2627 {
2628    struct v3dv_shader_variant *vs_variant =
2629       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2630    struct v3dv_shader_variant *vs_bin_variant =
2631       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2632    struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2633    struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2634 
2635    struct v3d_gs_prog_data *gs = NULL;
2636    struct v3d_gs_prog_data *gs_bin = NULL;
2637    if (pipeline->has_gs) {
2638       struct v3dv_shader_variant *gs_variant =
2639          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2640       struct v3dv_shader_variant *gs_bin_variant =
2641          pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2642       gs = gs_variant->prog_data.gs;
2643       gs_bin = gs_bin_variant->prog_data.gs;
2644    }
2645 
2646    if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2647                                vs_bin, vs, gs_bin, gs,
2648                                &pipeline->vpm_cfg_bin,
2649                                &pipeline->vpm_cfg)) {
2650       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2651    }
2652 
2653    return VK_SUCCESS;
2654 }
2655 
2656 static bool
stencil_op_is_no_op(struct vk_stencil_test_face_state * stencil)2657 stencil_op_is_no_op(struct vk_stencil_test_face_state *stencil)
2658 {
2659    return stencil->op.depth_fail == VK_STENCIL_OP_KEEP &&
2660           stencil->op.compare == VK_COMPARE_OP_ALWAYS;
2661 }
2662 
2663 /* Computes the ez_state based on a given vk_dynamic_graphics_state.  Note
2664  * that the parameter dyn doesn't need to be pipeline->dynamic_graphics_state,
2665  * as this method can be used by the cmd_buffer too.
2666  */
2667 void
v3dv_compute_ez_state(struct vk_dynamic_graphics_state * dyn,struct v3dv_pipeline * pipeline,enum v3dv_ez_state * ez_state,bool * incompatible_ez_test)2668 v3dv_compute_ez_state(struct vk_dynamic_graphics_state *dyn,
2669                       struct v3dv_pipeline *pipeline,
2670                       enum v3dv_ez_state *ez_state,
2671                       bool *incompatible_ez_test)
2672 {
2673    if (!dyn->ds.depth.test_enable)  {
2674       *ez_state = V3D_EZ_DISABLED;
2675       return;
2676    }
2677 
2678    switch (dyn->ds.depth.compare_op) {
2679    case VK_COMPARE_OP_LESS:
2680    case VK_COMPARE_OP_LESS_OR_EQUAL:
2681       *ez_state = V3D_EZ_LT_LE;
2682       break;
2683    case VK_COMPARE_OP_GREATER:
2684    case VK_COMPARE_OP_GREATER_OR_EQUAL:
2685       *ez_state = V3D_EZ_GT_GE;
2686       break;
2687    case VK_COMPARE_OP_NEVER:
2688    case VK_COMPARE_OP_EQUAL:
2689       *ez_state = V3D_EZ_UNDECIDED;
2690       break;
2691    default:
2692       *ez_state = V3D_EZ_DISABLED;
2693       *incompatible_ez_test = true;
2694       break;
2695    }
2696 
2697    /* If stencil is enabled and is not a no-op, we need to disable EZ */
2698    if (dyn->ds.stencil.test_enable &&
2699        (!stencil_op_is_no_op(&dyn->ds.stencil.front) ||
2700         !stencil_op_is_no_op(&dyn->ds.stencil.back))) {
2701       *ez_state = V3D_EZ_DISABLED;
2702    }
2703 
2704    /* If the FS writes Z, then it may update against the chosen EZ direction */
2705    struct v3dv_shader_variant *fs_variant =
2706       pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2707    if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2708        !fs_variant->prog_data.fs->writes_z_from_fep) {
2709       *ez_state = V3D_EZ_DISABLED;
2710    }
2711 }
2712 
2713 
2714 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2715 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2716                          const VkPipelineMultisampleStateCreateInfo *ms_info)
2717 {
2718    pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2719 
2720    /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2721     * requires this to be 0xf or 0x0 if using a single sample.
2722     */
2723    if (ms_info && ms_info->pSampleMask &&
2724        ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2725       pipeline->sample_mask &= ms_info->pSampleMask[0];
2726    }
2727 }
2728 
2729 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2730 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2731                                  const VkPipelineMultisampleStateCreateInfo *ms_info)
2732 {
2733    pipeline->sample_rate_shading =
2734       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2735       ms_info->sampleShadingEnable;
2736 }
2737 
2738 static void
pipeline_setup_rendering_info(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc)2739 pipeline_setup_rendering_info(struct v3dv_device *device,
2740                               struct v3dv_pipeline *pipeline,
2741                               const VkGraphicsPipelineCreateInfo *pCreateInfo,
2742                               const VkAllocationCallbacks *alloc)
2743 {
2744    struct vk_render_pass_state *rp = &pipeline->rendering_info;
2745 
2746    if (pipeline->pass) {
2747       assert(pipeline->subpass);
2748       struct v3dv_render_pass *pass = pipeline->pass;
2749       struct v3dv_subpass *subpass = pipeline->subpass;
2750       const uint32_t attachment_idx = subpass->ds_attachment.attachment;
2751 
2752       rp->view_mask = subpass->view_mask;
2753 
2754       rp->depth_attachment_format = VK_FORMAT_UNDEFINED;
2755       rp->stencil_attachment_format = VK_FORMAT_UNDEFINED;
2756       rp->attachments = MESA_VK_RP_ATTACHMENT_NONE;
2757       if (attachment_idx != VK_ATTACHMENT_UNUSED) {
2758          VkFormat ds_format = pass->attachments[attachment_idx].desc.format;
2759          if (vk_format_has_depth(ds_format)) {
2760             rp->depth_attachment_format = ds_format;
2761             rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
2762          }
2763          if (vk_format_has_stencil(ds_format)) {
2764             rp->stencil_attachment_format = ds_format;
2765             rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
2766          }
2767       }
2768 
2769       rp->color_attachment_count = subpass->color_count;
2770       for (uint32_t i = 0; i < subpass->color_count; i++) {
2771          const uint32_t attachment_idx = subpass->color_attachments[i].attachment;
2772          if (attachment_idx == VK_ATTACHMENT_UNUSED) {
2773             rp->color_attachment_formats[i] = VK_FORMAT_UNDEFINED;
2774             continue;
2775          }
2776          rp->color_attachment_formats[i] =
2777             pass->attachments[attachment_idx].desc.format;
2778          rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
2779       }
2780       return;
2781    }
2782 
2783    const VkPipelineRenderingCreateInfo *ri =
2784       vk_find_struct_const(pCreateInfo->pNext,
2785                            PIPELINE_RENDERING_CREATE_INFO);
2786    if (ri) {
2787       rp->view_mask = ri->viewMask;
2788 
2789       rp->color_attachment_count = ri->colorAttachmentCount;
2790       for (int i = 0; i < ri->colorAttachmentCount; i++) {
2791          rp->color_attachment_formats[i] = ri->pColorAttachmentFormats[i];
2792          if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) {
2793             rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
2794          }
2795       }
2796 
2797       rp->depth_attachment_format = ri->depthAttachmentFormat;
2798       if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
2799          rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
2800 
2801       rp->stencil_attachment_format = ri->stencilAttachmentFormat;
2802       if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
2803          rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
2804 
2805       return;
2806    }
2807 
2808    /* From the Vulkan spec for VkPipelineRenderingCreateInfo:
2809     *
2810     *    "if this structure is not specified, and the pipeline does not include
2811     *     a VkRenderPass, viewMask and colorAttachmentCount are 0, and
2812     *     depthAttachmentFormat and stencilAttachmentFormat are
2813     *     VK_FORMAT_UNDEFINED.
2814     */
2815    pipeline->rendering_info = (struct vk_render_pass_state) {
2816       .view_mask = 0,
2817       .attachments = 0,
2818       .color_attachment_count = 0,
2819       .depth_attachment_format = VK_FORMAT_UNDEFINED,
2820       .stencil_attachment_format = VK_FORMAT_UNDEFINED,
2821    };
2822 }
2823 
2824 static VkResult
pipeline_init_dynamic_state(struct v3dv_device * device,struct v3dv_pipeline * pipeline,struct vk_graphics_pipeline_all_state * pipeline_all_state,struct vk_graphics_pipeline_state * pipeline_state,const VkGraphicsPipelineCreateInfo * pCreateInfo)2825 pipeline_init_dynamic_state(struct v3dv_device *device,
2826                             struct v3dv_pipeline *pipeline,
2827                             struct vk_graphics_pipeline_all_state *pipeline_all_state,
2828                             struct vk_graphics_pipeline_state *pipeline_state,
2829                             const VkGraphicsPipelineCreateInfo *pCreateInfo)
2830 {
2831    VkResult result = VK_SUCCESS;
2832    result = vk_graphics_pipeline_state_fill(&pipeline->device->vk, pipeline_state,
2833                                             pCreateInfo, &pipeline->rendering_info, 0,
2834                                             pipeline_all_state, NULL, 0, NULL);
2835    if (result != VK_SUCCESS)
2836       return result;
2837 
2838    vk_dynamic_graphics_state_fill(&pipeline->dynamic_graphics_state, pipeline_state);
2839 
2840    struct v3dv_dynamic_state *v3dv_dyn = &pipeline->dynamic;
2841    struct vk_dynamic_graphics_state *dyn = &pipeline->dynamic_graphics_state;
2842 
2843    if (BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
2844        BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_SCISSORS)) {
2845       /* FIXME: right now we don't support multiViewport so viewporst[0] would
2846        * work now, but would need to change if we allow multiple viewports.
2847        */
2848       v3dv_X(device, viewport_compute_xform)(&dyn->vp.viewports[0],
2849                                              v3dv_dyn->viewport.scale[0],
2850                                              v3dv_dyn->viewport.translate[0]);
2851 
2852    }
2853 
2854    v3dv_dyn->color_write_enable =
2855       (1ull << (4 * V3D_MAX_RENDER_TARGETS(device->devinfo.ver))) - 1;
2856    if (pipeline_state->cb) {
2857       const uint8_t color_writes = pipeline_state->cb->color_write_enables;
2858       v3dv_dyn->color_write_enable = 0;
2859       for (uint32_t i = 0; i < pipeline_state->cb->attachment_count; i++) {
2860          v3dv_dyn->color_write_enable |=
2861             (color_writes & BITFIELD_BIT(i)) ? (0xfu << (i * 4)) : 0;
2862       }
2863    }
2864 
2865    return result;
2866 }
2867 
2868 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2869 pipeline_init(struct v3dv_pipeline *pipeline,
2870               struct v3dv_device *device,
2871               struct v3dv_pipeline_cache *cache,
2872               const VkGraphicsPipelineCreateInfo *pCreateInfo,
2873               const VkAllocationCallbacks *pAllocator)
2874 {
2875    VkResult result = VK_SUCCESS;
2876 
2877    pipeline->device = device;
2878 
2879    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2880    pipeline->layout = layout;
2881    v3dv_pipeline_layout_ref(pipeline->layout);
2882 
2883    V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2884    if (render_pass) {
2885       assert(pCreateInfo->subpass < render_pass->subpass_count);
2886       pipeline->pass = render_pass;
2887       pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2888    }
2889 
2890    pipeline_setup_rendering_info(device, pipeline, pCreateInfo, pAllocator);
2891 
2892    const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2893       pCreateInfo->pInputAssemblyState;
2894    pipeline->topology = vk_to_mesa_prim[ia_info->topology];
2895 
2896    struct vk_graphics_pipeline_all_state all;
2897    struct vk_graphics_pipeline_state pipeline_state = { };
2898    result = pipeline_init_dynamic_state(device, pipeline, &all, &pipeline_state,
2899                                         pCreateInfo);
2900 
2901    if (result != VK_SUCCESS) {
2902       /* Caller would already destroy the pipeline, and we didn't allocate any
2903        * extra info. We don't need to do anything else.
2904        */
2905       return result;
2906    }
2907 
2908    /* If rasterization is disabled, we just disable it through the CFG_BITS
2909     * packet, so for building the pipeline we always assume it is enabled
2910     */
2911    const bool raster_enabled =
2912       (pipeline_state.rs && !pipeline_state.rs->rasterizer_discard_enable) ||
2913       BITSET_TEST(pipeline_state.dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
2914 
2915    pipeline->rasterization_enabled = raster_enabled;
2916 
2917    const VkPipelineViewportStateCreateInfo *vp_info =
2918       raster_enabled ? pCreateInfo->pViewportState : NULL;
2919 
2920    const VkPipelineDepthStencilStateCreateInfo *ds_info =
2921       raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2922 
2923    const VkPipelineRasterizationStateCreateInfo *rs_info =
2924       raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2925 
2926    const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2927       raster_enabled ? vk_find_struct_const(
2928          rs_info->pNext,
2929          PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2930             NULL;
2931 
2932    const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2933       raster_enabled ? vk_find_struct_const(
2934          rs_info->pNext,
2935          PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2936             NULL;
2937 
2938    const VkPipelineColorBlendStateCreateInfo *cb_info =
2939       raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2940 
2941    const VkPipelineMultisampleStateCreateInfo *ms_info =
2942       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2943 
2944    const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control =
2945       vp_info ? vk_find_struct_const(vp_info->pNext,
2946                                      PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT) :
2947                 NULL;
2948 
2949    if (depth_clip_control)
2950       pipeline->negative_one_to_one = depth_clip_control->negativeOneToOne;
2951 
2952    v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2953                                        rs_info, pv_info, ls_info,
2954                                        ms_info,
2955                                        &pipeline_state);
2956 
2957    pipeline_set_sample_mask(pipeline, ms_info);
2958    pipeline_set_sample_rate_shading(pipeline, ms_info);
2959    pipeline->line_smooth = enable_line_smooth(pipeline, rs_info);
2960 
2961    result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2962 
2963    if (result != VK_SUCCESS) {
2964       /* Caller would already destroy the pipeline, and we didn't allocate any
2965        * extra info. We don't need to do anything else.
2966        */
2967       return result;
2968    }
2969 
2970    const VkPipelineVertexInputStateCreateInfo *vi_info =
2971       pCreateInfo->pVertexInputState;
2972 
2973    const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
2974       vk_find_struct_const(vi_info->pNext,
2975                            PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2976 
2977    v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
2978 
2979    if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) {
2980       pipeline->default_attribute_values =
2981          v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline);
2982 
2983       if (!pipeline->default_attribute_values)
2984          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2985    } else {
2986       pipeline->default_attribute_values = NULL;
2987    }
2988 
2989    /* This must be done after the pipeline has been compiled */
2990    v3dv_compute_ez_state(&pipeline->dynamic_graphics_state,
2991                          pipeline,
2992                          &pipeline->ez_state,
2993                          &pipeline->incompatible_ez_test);
2994 
2995    return result;
2996 }
2997 
2998 static VkPipelineCreateFlagBits2KHR
pipeline_create_info_get_flags(VkPipelineCreateFlags flags,const void * pNext)2999 pipeline_create_info_get_flags(VkPipelineCreateFlags flags, const void *pNext)
3000 {
3001    const VkPipelineCreateFlags2CreateInfoKHR *flags2 =
3002       vk_find_struct_const(pNext, PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR);
3003    if (flags2)
3004       return flags2->flags;
3005    else
3006       return flags;
3007 }
3008 
3009 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline,VkPipelineCreateFlagBits2KHR * flags)3010 graphics_pipeline_create(VkDevice _device,
3011                          VkPipelineCache _cache,
3012                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
3013                          const VkAllocationCallbacks *pAllocator,
3014                          VkPipeline *pPipeline,
3015                          VkPipelineCreateFlagBits2KHR *flags)
3016 {
3017    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3018    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3019 
3020    struct v3dv_pipeline *pipeline;
3021    VkResult result;
3022 
3023    *flags = pipeline_create_info_get_flags(pCreateInfo->flags,
3024                                            pCreateInfo->pNext);
3025 
3026    /* Use the default pipeline cache if none is specified */
3027    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3028       cache = &device->default_pipeline_cache;
3029 
3030    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3031                                VK_OBJECT_TYPE_PIPELINE);
3032 
3033    if (pipeline == NULL)
3034       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3035 
3036    pipeline->flags = *flags;
3037    result = pipeline_init(pipeline, device, cache, pCreateInfo, pAllocator);
3038 
3039    if (result != VK_SUCCESS) {
3040       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3041       if (result == VK_PIPELINE_COMPILE_REQUIRED)
3042          *pPipeline = VK_NULL_HANDLE;
3043       return result;
3044    }
3045 
3046    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3047 
3048    return VK_SUCCESS;
3049 }
3050 
3051 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3052 v3dv_CreateGraphicsPipelines(VkDevice _device,
3053                              VkPipelineCache pipelineCache,
3054                              uint32_t count,
3055                              const VkGraphicsPipelineCreateInfo *pCreateInfos,
3056                              const VkAllocationCallbacks *pAllocator,
3057                              VkPipeline *pPipelines)
3058 {
3059    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3060    VkResult result = VK_SUCCESS;
3061 
3062    if (V3D_DBG(SHADERS))
3063       mtx_lock(&device->pdevice->mutex);
3064 
3065    uint32_t i = 0;
3066    for (; i < count; i++) {
3067       VkResult local_result;
3068 
3069       VkPipelineCreateFlagBits2KHR flags;
3070       local_result = graphics_pipeline_create(_device,
3071                                               pipelineCache,
3072                                               &pCreateInfos[i],
3073                                               pAllocator,
3074                                               &pPipelines[i],
3075                                               &flags);
3076 
3077       if (local_result != VK_SUCCESS) {
3078          result = local_result;
3079          pPipelines[i] = VK_NULL_HANDLE;
3080          if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3081             break;
3082       }
3083    }
3084 
3085    for (; i < count; i++)
3086       pPipelines[i] = VK_NULL_HANDLE;
3087 
3088    if (V3D_DBG(SHADERS))
3089       mtx_unlock(&device->pdevice->mutex);
3090 
3091    return result;
3092 }
3093 
3094 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3095 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3096 {
3097    assert(glsl_type_is_vector_or_scalar(type));
3098 
3099    uint32_t comp_size = glsl_type_is_boolean(type)
3100       ? 4 : glsl_get_bit_size(type) / 8;
3101    unsigned length = glsl_get_vector_elements(type);
3102    *size = comp_size * length,
3103    *align = comp_size * (length == 3 ? 4 : length);
3104 }
3105 
3106 static void
lower_compute(struct nir_shader * nir)3107 lower_compute(struct nir_shader *nir)
3108 {
3109    if (!nir->info.shared_memory_explicit_layout) {
3110       NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3111                nir_var_mem_shared, shared_type_info);
3112    }
3113 
3114    NIR_PASS(_, nir, nir_lower_explicit_io,
3115             nir_var_mem_shared, nir_address_format_32bit_offset);
3116 
3117    struct nir_lower_compute_system_values_options sysval_options = {
3118       .has_base_workgroup_id = true,
3119    };
3120    NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
3121 }
3122 
3123 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3124 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3125                          struct v3dv_pipeline_cache *cache,
3126                          const VkComputePipelineCreateInfo *info,
3127                          const VkAllocationCallbacks *alloc)
3128 {
3129    VkPipelineCreationFeedback pipeline_feedback = {
3130       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3131    };
3132    int64_t pipeline_start = os_time_get_nano();
3133 
3134    struct v3dv_device *device = pipeline->device;
3135    struct v3dv_physical_device *physical_device = device->pdevice;
3136 
3137    const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3138    gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3139 
3140    struct v3dv_pipeline_stage *p_stage =
3141       vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3142                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3143    if (!p_stage)
3144       return VK_ERROR_OUT_OF_HOST_MEMORY;
3145 
3146    p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3147    p_stage->pipeline = pipeline;
3148    p_stage->stage = gl_shader_stage_to_broadcom(stage);
3149    p_stage->entrypoint = sinfo->pName;
3150    p_stage->module = vk_shader_module_from_handle(sinfo->module);
3151    p_stage->spec_info = sinfo->pSpecializationInfo;
3152    p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3153    if (!p_stage->module) {
3154       p_stage->module_info =
3155          vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
3156    }
3157 
3158    vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
3159                                      info->pNext, sinfo->pNext);
3160 
3161    vk_pipeline_hash_shader_stage(pipeline->flags,
3162                                  &info->stage,
3163                                  &p_stage->robustness,
3164                                  p_stage->shader_sha1);
3165 
3166    p_stage->nir = NULL;
3167 
3168    pipeline->stages[BROADCOM_SHADER_COMPUTE] = p_stage;
3169    pipeline->active_stages |= sinfo->stage;
3170 
3171    /* First we try to get the variants from the pipeline cache (unless we are
3172     * required to capture internal representations, since in that case we need
3173     * compile).
3174     */
3175    bool needs_executable_info =
3176       pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3177    if (!needs_executable_info) {
3178       struct v3dv_pipeline_key pipeline_key;
3179       pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3180       pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3181 
3182       bool cache_hit = false;
3183       pipeline->shared_data =
3184          v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3185 
3186       if (pipeline->shared_data != NULL) {
3187          assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3188          if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3189             pipeline_feedback.flags |=
3190                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3191 
3192          goto success;
3193       }
3194    }
3195 
3196    if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3197       return VK_PIPELINE_COMPILE_REQUIRED;
3198 
3199    pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3200                                                                pipeline,
3201                                                                false);
3202    if (!pipeline->shared_data)
3203       return VK_ERROR_OUT_OF_HOST_MEMORY;
3204 
3205    p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3206 
3207    /* If not found on cache, compile it */
3208    p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3209    assert(p_stage->nir);
3210 
3211    v3d_optimize_nir(NULL, p_stage->nir);
3212    pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3213    lower_compute(p_stage->nir);
3214 
3215    VkResult result = VK_SUCCESS;
3216 
3217    struct v3d_key key;
3218    memset(&key, 0, sizeof(key));
3219    pipeline_populate_v3d_key(&key, p_stage, 0);
3220    pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3221       pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3222                                       alloc, &result);
3223 
3224    if (result != VK_SUCCESS)
3225       return result;
3226 
3227    if (!upload_assembly(pipeline))
3228       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3229 
3230    v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3231 
3232 success:
3233 
3234    pipeline_check_buffer_device_address(pipeline);
3235 
3236    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3237    write_creation_feedback(pipeline,
3238                            info->pNext,
3239                            &pipeline_feedback,
3240                            1,
3241                            &info->stage);
3242 
3243    /* As we got the variants in pipeline->shared_data, after compiling we
3244     * don't need the pipeline_stages.
3245     */
3246    if (!needs_executable_info)
3247       pipeline_free_stages(device, pipeline, alloc);
3248 
3249    pipeline_check_spill_size(pipeline);
3250 
3251    return VK_SUCCESS;
3252 }
3253 
3254 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3255 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3256                       struct v3dv_device *device,
3257                       struct v3dv_pipeline_cache *cache,
3258                       const VkComputePipelineCreateInfo *info,
3259                       const VkAllocationCallbacks *alloc)
3260 {
3261    V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3262 
3263    pipeline->device = device;
3264    pipeline->layout = layout;
3265    v3dv_pipeline_layout_ref(pipeline->layout);
3266 
3267    VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3268    if (result != VK_SUCCESS)
3269       return result;
3270 
3271    return result;
3272 }
3273 
3274 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline,VkPipelineCreateFlagBits2KHR * flags)3275 compute_pipeline_create(VkDevice _device,
3276                          VkPipelineCache _cache,
3277                          const VkComputePipelineCreateInfo *pCreateInfo,
3278                          const VkAllocationCallbacks *pAllocator,
3279                          VkPipeline *pPipeline,
3280                          VkPipelineCreateFlagBits2KHR *flags)
3281 {
3282    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3283    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3284 
3285    struct v3dv_pipeline *pipeline;
3286    VkResult result;
3287 
3288    *flags = pipeline_create_info_get_flags(pCreateInfo->flags,
3289                                            pCreateInfo->pNext);
3290 
3291    /* Use the default pipeline cache if none is specified */
3292    if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3293       cache = &device->default_pipeline_cache;
3294 
3295    pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3296                                VK_OBJECT_TYPE_PIPELINE);
3297    if (pipeline == NULL)
3298       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3299 
3300    pipeline->flags = *flags;
3301    result = compute_pipeline_init(pipeline, device, cache,
3302                                   pCreateInfo, pAllocator);
3303    if (result != VK_SUCCESS) {
3304       v3dv_destroy_pipeline(pipeline, device, pAllocator);
3305       if (result == VK_PIPELINE_COMPILE_REQUIRED)
3306          *pPipeline = VK_NULL_HANDLE;
3307       return result;
3308    }
3309 
3310    *pPipeline = v3dv_pipeline_to_handle(pipeline);
3311 
3312    return VK_SUCCESS;
3313 }
3314 
3315 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3316 v3dv_CreateComputePipelines(VkDevice _device,
3317                             VkPipelineCache pipelineCache,
3318                             uint32_t createInfoCount,
3319                             const VkComputePipelineCreateInfo *pCreateInfos,
3320                             const VkAllocationCallbacks *pAllocator,
3321                             VkPipeline *pPipelines)
3322 {
3323    V3DV_FROM_HANDLE(v3dv_device, device, _device);
3324    VkResult result = VK_SUCCESS;
3325 
3326    if (V3D_DBG(SHADERS))
3327       mtx_lock(&device->pdevice->mutex);
3328 
3329    uint32_t i = 0;
3330    for (; i < createInfoCount; i++) {
3331       VkResult local_result;
3332       VkPipelineCreateFlagBits2KHR flags;
3333       local_result = compute_pipeline_create(_device,
3334                                               pipelineCache,
3335                                               &pCreateInfos[i],
3336                                               pAllocator,
3337                                               &pPipelines[i],
3338                                               &flags);
3339 
3340       if (local_result != VK_SUCCESS) {
3341          result = local_result;
3342          pPipelines[i] = VK_NULL_HANDLE;
3343          if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3344             break;
3345       }
3346    }
3347 
3348    for (; i < createInfoCount; i++)
3349       pPipelines[i] = VK_NULL_HANDLE;
3350 
3351    if (V3D_DBG(SHADERS))
3352       mtx_unlock(&device->pdevice->mutex);
3353 
3354    return result;
3355 }
3356 
3357 static nir_shader *
pipeline_get_nir(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3358 pipeline_get_nir(struct v3dv_pipeline *pipeline,
3359                  enum broadcom_shader_stage stage)
3360 {
3361    assert(stage >= 0 && stage < BROADCOM_SHADER_STAGES);
3362    if (pipeline->stages[stage])
3363       return pipeline->stages[stage]->nir;
3364 
3365    return NULL;
3366 }
3367 
3368 static struct v3d_prog_data *
pipeline_get_prog_data(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3369 pipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3370                        enum broadcom_shader_stage stage)
3371 {
3372    if (pipeline->shared_data->variants[stage])
3373       return pipeline->shared_data->variants[stage]->prog_data.base;
3374    return NULL;
3375 }
3376 
3377 static uint64_t *
pipeline_get_qpu(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,uint32_t * qpu_size)3378 pipeline_get_qpu(struct v3dv_pipeline *pipeline,
3379                  enum broadcom_shader_stage stage,
3380                  uint32_t *qpu_size)
3381 {
3382    struct v3dv_shader_variant *variant =
3383       pipeline->shared_data->variants[stage];
3384    if (!variant) {
3385       *qpu_size = 0;
3386       return NULL;
3387    }
3388 
3389    *qpu_size = variant->qpu_insts_size;
3390    return variant->qpu_insts;
3391 }
3392 
3393 /* FIXME: we use the same macro in various drivers, maybe move it to
3394  * the common vk_util.h?
3395  */
3396 #define WRITE_STR(field, ...) ({                                \
3397    memset(field, 0, sizeof(field));                             \
3398    UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3399    assert(_i > 0 && _i < sizeof(field));                        \
3400 })
3401 
3402 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3403 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3404               const char *data)
3405 {
3406    ir->isText = VK_TRUE;
3407 
3408    size_t data_len = strlen(data) + 1;
3409 
3410    if (ir->pData == NULL) {
3411       ir->dataSize = data_len;
3412       return true;
3413    }
3414 
3415    strncpy(ir->pData, data, ir->dataSize);
3416    if (ir->dataSize < data_len)
3417       return false;
3418 
3419    ir->dataSize = data_len;
3420    return true;
3421 }
3422 
3423 static void
append(char ** str,size_t * offset,const char * fmt,...)3424 append(char **str, size_t *offset, const char *fmt, ...)
3425 {
3426    va_list args;
3427    va_start(args, fmt);
3428    ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3429    va_end(args);
3430 }
3431 
3432 static void
pipeline_collect_executable_data(struct v3dv_pipeline * pipeline)3433 pipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3434 {
3435    if (pipeline->executables.mem_ctx)
3436       return;
3437 
3438    pipeline->executables.mem_ctx = ralloc_context(NULL);
3439    util_dynarray_init(&pipeline->executables.data,
3440                       pipeline->executables.mem_ctx);
3441 
3442    /* Don't crash for failed/bogus pipelines */
3443    if (!pipeline->shared_data)
3444       return;
3445 
3446    for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3447       VkShaderStageFlags vk_stage =
3448          mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3449       if (!(vk_stage & pipeline->active_stages))
3450          continue;
3451 
3452       char *nir_str = NULL;
3453       char *qpu_str = NULL;
3454 
3455       if (pipeline_keep_qpu(pipeline)) {
3456          nir_shader *nir = pipeline_get_nir(pipeline, s);
3457          nir_str = nir ?
3458             nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3459 
3460          uint32_t qpu_size;
3461          uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3462          if (qpu) {
3463             uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3464             qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3465                                    qpu_inst_count * 96);
3466             size_t offset = 0;
3467             for (int i = 0; i < qpu_inst_count; i++) {
3468                const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3469                append(&qpu_str, &offset, "%s\n", str);
3470                ralloc_free((void *)str);
3471             }
3472          }
3473       }
3474 
3475       struct v3dv_pipeline_executable_data data = {
3476          .stage = s,
3477          .nir_str = nir_str,
3478          .qpu_str = qpu_str,
3479       };
3480       util_dynarray_append(&pipeline->executables.data,
3481                            struct v3dv_pipeline_executable_data, data);
3482    }
3483 }
3484 
3485 static const struct v3dv_pipeline_executable_data *
pipeline_get_executable(struct v3dv_pipeline * pipeline,uint32_t index)3486 pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3487 {
3488    assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3489                                              struct v3dv_pipeline_executable_data));
3490    return util_dynarray_element(&pipeline->executables.data,
3491                                 struct v3dv_pipeline_executable_data,
3492                                 index);
3493 }
3494 
3495 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3496 v3dv_GetPipelineExecutableInternalRepresentationsKHR(
3497    VkDevice device,
3498    const VkPipelineExecutableInfoKHR *pExecutableInfo,
3499    uint32_t *pInternalRepresentationCount,
3500    VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3501 {
3502    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3503 
3504    pipeline_collect_executable_data(pipeline);
3505 
3506    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3507                           pInternalRepresentations, pInternalRepresentationCount);
3508 
3509    bool incomplete = false;
3510    const struct v3dv_pipeline_executable_data *exe =
3511       pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3512 
3513    if (exe->nir_str) {
3514       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3515                                &out, ir) {
3516          WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3517          WRITE_STR(ir->description, "Final NIR form");
3518          if (!write_ir_text(ir, exe->nir_str))
3519             incomplete = true;
3520       }
3521    }
3522 
3523    if (exe->qpu_str) {
3524       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3525                                &out, ir) {
3526          WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3527          WRITE_STR(ir->description, "Final QPU assembly");
3528          if (!write_ir_text(ir, exe->qpu_str))
3529             incomplete = true;
3530       }
3531    }
3532 
3533    return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3534 }
3535 
3536 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3537 v3dv_GetPipelineExecutablePropertiesKHR(
3538    VkDevice device,
3539    const VkPipelineInfoKHR *pPipelineInfo,
3540    uint32_t *pExecutableCount,
3541    VkPipelineExecutablePropertiesKHR *pProperties)
3542 {
3543    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3544 
3545    pipeline_collect_executable_data(pipeline);
3546 
3547    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3548                           pProperties, pExecutableCount);
3549 
3550    util_dynarray_foreach(&pipeline->executables.data,
3551                          struct v3dv_pipeline_executable_data, exe) {
3552       vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3553          gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3554          props->stages = mesa_to_vk_shader_stage(mesa_stage);
3555 
3556          WRITE_STR(props->name, "%s (%s)",
3557                    _mesa_shader_stage_to_abbrev(mesa_stage),
3558                    broadcom_shader_stage_is_binning(exe->stage) ?
3559                      "Binning" : "Render");
3560 
3561          WRITE_STR(props->description, "%s",
3562                    _mesa_shader_stage_to_string(mesa_stage));
3563 
3564          props->subgroupSize = V3D_CHANNELS;
3565       }
3566    }
3567 
3568    return vk_outarray_status(&out);
3569 }
3570 
3571 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3572 v3dv_GetPipelineExecutableStatisticsKHR(
3573    VkDevice device,
3574    const VkPipelineExecutableInfoKHR *pExecutableInfo,
3575    uint32_t *pStatisticCount,
3576    VkPipelineExecutableStatisticKHR *pStatistics)
3577 {
3578    V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3579 
3580    pipeline_collect_executable_data(pipeline);
3581 
3582    const struct v3dv_pipeline_executable_data *exe =
3583       pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3584 
3585    struct v3d_prog_data *prog_data =
3586       pipeline_get_prog_data(pipeline, exe->stage);
3587 
3588    struct v3dv_shader_variant *variant =
3589       pipeline->shared_data->variants[exe->stage];
3590    uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3591 
3592    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3593                           pStatistics, pStatisticCount);
3594 
3595    if (qpu_inst_count > 0) {
3596       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3597          WRITE_STR(stat->name, "Compile Strategy");
3598          WRITE_STR(stat->description, "Chosen compile strategy index");
3599          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3600          stat->value.u64 = prog_data->compile_strategy_idx;
3601       }
3602 
3603       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3604          WRITE_STR(stat->name, "Instruction Count");
3605          WRITE_STR(stat->description, "Number of QPU instructions");
3606          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3607          stat->value.u64 = qpu_inst_count;
3608       }
3609 
3610       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3611          WRITE_STR(stat->name, "Thread Count");
3612          WRITE_STR(stat->description, "Number of QPU threads dispatched");
3613          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3614          stat->value.u64 = prog_data->threads;
3615       }
3616 
3617       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3618          WRITE_STR(stat->name, "Spill Size");
3619          WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3620          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3621          stat->value.u64 = prog_data->spill_size;
3622       }
3623 
3624       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3625          WRITE_STR(stat->name, "TMU Spills");
3626          WRITE_STR(stat->description, "Number of times a register was spilled "
3627                                       "to memory");
3628          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3629          stat->value.u64 = prog_data->spill_size;
3630       }
3631 
3632       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3633          WRITE_STR(stat->name, "TMU Fills");
3634          WRITE_STR(stat->description, "Number of times a register was filled "
3635                                       "from memory");
3636          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3637          stat->value.u64 = prog_data->spill_size;
3638       }
3639 
3640       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3641          WRITE_STR(stat->name, "QPU Read Stalls");
3642          WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3643                                       "register read dependency");
3644          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3645          stat->value.u64 = prog_data->qpu_read_stalls;
3646       }
3647    }
3648 
3649    return vk_outarray_status(&out);
3650 }
3651