1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_util.h"
25
26 #include "v3dv_debug.h"
27 #include "v3dv_private.h"
28
29 #include "common/v3d_debug.h"
30 #include "qpu/qpu_disasm.h"
31
32 #include "compiler/nir/nir_builder.h"
33 #include "nir/nir_serialize.h"
34
35 #include "util/u_atomic.h"
36 #include "util/os_time.h"
37
38 #include "vk_format.h"
39 #include "vk_nir_convert_ycbcr.h"
40 #include "vk_pipeline.h"
41
42 static VkResult
43 compute_vpm_config(struct v3dv_pipeline *pipeline);
44
45 void
v3dv_print_v3d_key(struct v3d_key * key,uint32_t v3d_key_size)46 v3dv_print_v3d_key(struct v3d_key *key,
47 uint32_t v3d_key_size)
48 {
49 struct mesa_sha1 ctx;
50 unsigned char sha1[20];
51 char sha1buf[41];
52
53 _mesa_sha1_init(&ctx);
54
55 _mesa_sha1_update(&ctx, key, v3d_key_size);
56
57 _mesa_sha1_final(&ctx, sha1);
58 _mesa_sha1_format(sha1buf, sha1);
59
60 fprintf(stderr, "key %p: %s\n", key, sha1buf);
61 }
62
63 static void
pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage * p_stage)64 pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage)
65 {
66 VkPipelineShaderStageCreateInfo info = {
67 .module = vk_shader_module_handle_from_nir(p_stage->nir),
68 .pName = p_stage->entrypoint,
69 .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage),
70 };
71
72 vk_pipeline_hash_shader_stage(0, &info, NULL, p_stage->shader_sha1);
73 }
74
75 void
v3dv_shader_variant_destroy(struct v3dv_device * device,struct v3dv_shader_variant * variant)76 v3dv_shader_variant_destroy(struct v3dv_device *device,
77 struct v3dv_shader_variant *variant)
78 {
79 /* The assembly BO is shared by all variants in the pipeline, so it can't
80 * be freed here and should be freed with the pipeline
81 */
82 if (variant->qpu_insts) {
83 free(variant->qpu_insts);
84 variant->qpu_insts = NULL;
85 }
86 ralloc_free(variant->prog_data.base);
87 vk_free(&device->vk.alloc, variant);
88 }
89
90 static void
destroy_pipeline_stage(struct v3dv_device * device,struct v3dv_pipeline_stage * p_stage,const VkAllocationCallbacks * pAllocator)91 destroy_pipeline_stage(struct v3dv_device *device,
92 struct v3dv_pipeline_stage *p_stage,
93 const VkAllocationCallbacks *pAllocator)
94 {
95 if (!p_stage)
96 return;
97
98 ralloc_free(p_stage->nir);
99 vk_free2(&device->vk.alloc, pAllocator, p_stage);
100 }
101
102 static void
pipeline_free_stages(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator)103 pipeline_free_stages(struct v3dv_device *device,
104 struct v3dv_pipeline *pipeline,
105 const VkAllocationCallbacks *pAllocator)
106 {
107 assert(pipeline);
108
109 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
110 destroy_pipeline_stage(device, pipeline->stages[stage], pAllocator);
111 pipeline->stages[stage] = NULL;
112 }
113 }
114
115 static void
v3dv_destroy_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_device * device,const VkAllocationCallbacks * pAllocator)116 v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
117 struct v3dv_device *device,
118 const VkAllocationCallbacks *pAllocator)
119 {
120 if (!pipeline)
121 return;
122
123 pipeline_free_stages(device, pipeline, pAllocator);
124
125 if (pipeline->shared_data) {
126 v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);
127 pipeline->shared_data = NULL;
128 }
129
130 if (pipeline->spill.bo) {
131 assert(pipeline->spill.size_per_thread > 0);
132 v3dv_bo_free(device, pipeline->spill.bo);
133 }
134
135 if (pipeline->default_attribute_values) {
136 v3dv_bo_free(device, pipeline->default_attribute_values);
137 pipeline->default_attribute_values = NULL;
138 }
139
140 if (pipeline->executables.mem_ctx)
141 ralloc_free(pipeline->executables.mem_ctx);
142
143 if (pipeline->layout)
144 v3dv_pipeline_layout_unref(device, pipeline->layout, pAllocator);
145
146 vk_object_free(&device->vk, pAllocator, pipeline);
147 }
148
149 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)150 v3dv_DestroyPipeline(VkDevice _device,
151 VkPipeline _pipeline,
152 const VkAllocationCallbacks *pAllocator)
153 {
154 V3DV_FROM_HANDLE(v3dv_device, device, _device);
155 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);
156
157 if (!pipeline)
158 return;
159
160 v3dv_destroy_pipeline(pipeline, device, pAllocator);
161 }
162
163 static const struct spirv_to_nir_options default_spirv_options = {
164 .ubo_addr_format = nir_address_format_32bit_index_offset,
165 .ssbo_addr_format = nir_address_format_32bit_index_offset,
166 .phys_ssbo_addr_format = nir_address_format_2x32bit_global,
167 .push_const_addr_format = nir_address_format_logical,
168 .shared_addr_format = nir_address_format_32bit_offset,
169 };
170
171 const nir_shader_compiler_options *
v3dv_pipeline_get_nir_options(const struct v3d_device_info * devinfo)172 v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo)
173 {
174 static bool initialized = false;
175 static nir_shader_compiler_options options = {
176 .lower_uadd_sat = true,
177 .lower_usub_sat = true,
178 .lower_iadd_sat = true,
179 .lower_all_io_to_temps = true,
180 .lower_extract_byte = true,
181 .lower_extract_word = true,
182 .lower_insert_byte = true,
183 .lower_insert_word = true,
184 .lower_bitfield_insert = true,
185 .lower_bitfield_extract = true,
186 .lower_bitfield_reverse = true,
187 .lower_bit_count = true,
188 .lower_cs_local_id_to_index = true,
189 .lower_ffract = true,
190 .lower_fmod = true,
191 .lower_pack_unorm_2x16 = true,
192 .lower_pack_snorm_2x16 = true,
193 .lower_unpack_unorm_2x16 = true,
194 .lower_unpack_snorm_2x16 = true,
195 .lower_pack_unorm_4x8 = true,
196 .lower_pack_snorm_4x8 = true,
197 .lower_unpack_unorm_4x8 = true,
198 .lower_unpack_snorm_4x8 = true,
199 .lower_pack_half_2x16 = true,
200 .lower_unpack_half_2x16 = true,
201 .lower_pack_32_2x16 = true,
202 .lower_pack_32_2x16_split = true,
203 .lower_unpack_32_2x16_split = true,
204 .lower_mul_2x32_64 = true,
205 .lower_fdiv = true,
206 .lower_find_lsb = true,
207 .lower_ffma16 = true,
208 .lower_ffma32 = true,
209 .lower_ffma64 = true,
210 .lower_flrp32 = true,
211 .lower_fpow = true,
212 .lower_fsqrt = true,
213 .lower_ifind_msb = true,
214 .lower_isign = true,
215 .lower_ldexp = true,
216 .lower_mul_high = true,
217 .lower_wpos_pntc = false,
218 .lower_to_scalar = true,
219 .lower_device_index_to_zero = true,
220 .lower_fquantize2f16 = true,
221 .lower_ufind_msb = true,
222 .has_fsub = true,
223 .has_isub = true,
224 .has_uclz = true,
225 .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
226 * needs to be supported */
227 .lower_interpolate_at = true,
228 .max_unroll_iterations = 16,
229 .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
230 .divergence_analysis_options =
231 nir_divergence_multiple_workgroup_per_compute_subgroup,
232 .discard_is_demote = true,
233 .has_ddx_intrinsics = true,
234 .scalarize_ddx = true,
235 };
236
237 if (!initialized) {
238 options.lower_fsat = devinfo->ver < 71;
239 initialized = true;
240 }
241
242 return &options;
243 }
244
245 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _pipeline_layout,uint32_t set,uint32_t binding,uint32_t array_index)246 lookup_ycbcr_conversion(const void *_pipeline_layout, uint32_t set,
247 uint32_t binding, uint32_t array_index)
248 {
249 struct v3dv_pipeline_layout *pipeline_layout =
250 (struct v3dv_pipeline_layout *) _pipeline_layout;
251
252 assert(set < pipeline_layout->num_sets);
253 struct v3dv_descriptor_set_layout *set_layout =
254 pipeline_layout->set[set].layout;
255
256 assert(binding < set_layout->binding_count);
257 struct v3dv_descriptor_set_binding_layout *bind_layout =
258 &set_layout->binding[binding];
259
260 if (bind_layout->immutable_samplers_offset) {
261 const struct v3dv_sampler *immutable_samplers =
262 v3dv_immutable_samplers(set_layout, bind_layout);
263 const struct v3dv_sampler *sampler = &immutable_samplers[array_index];
264 return sampler->conversion ? &sampler->conversion->state : NULL;
265 } else {
266 return NULL;
267 }
268 }
269
270 static void
preprocess_nir(nir_shader * nir)271 preprocess_nir(nir_shader *nir)
272 {
273 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
274 .frag_coord = true,
275 .point_coord = true,
276 };
277 NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
278
279 /* Vulkan uses the separate-shader linking model */
280 nir->info.separate_shader = true;
281
282 /* Make sure we lower variable initializers on output variables so that
283 * nir_remove_dead_variables below sees the corresponding stores
284 */
285 NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out);
286
287 if (nir->info.stage == MESA_SHADER_FRAGMENT)
288 NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out);
289 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
290 NIR_PASS(_, nir, nir_lower_input_attachments,
291 &(nir_input_attachment_options) {
292 .use_fragcoord_sysval = false,
293 });
294 }
295
296 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
297 nir_shader_get_entrypoint(nir), true, false);
298
299 NIR_PASS(_, nir, nir_lower_system_values);
300
301 NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL);
302
303 NIR_PASS(_, nir, nir_normalize_cubemap_coords);
304
305 NIR_PASS(_, nir, nir_lower_global_vars_to_local);
306
307 NIR_PASS(_, nir, nir_split_var_copies);
308 NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
309
310 v3d_optimize_nir(NULL, nir);
311
312 NIR_PASS(_, nir, nir_lower_explicit_io,
313 nir_var_mem_push_const,
314 nir_address_format_32bit_offset);
315
316 NIR_PASS(_, nir, nir_lower_explicit_io,
317 nir_var_mem_ubo | nir_var_mem_ssbo,
318 nir_address_format_32bit_index_offset);
319
320 NIR_PASS(_, nir, nir_lower_explicit_io,
321 nir_var_mem_global,
322 nir_address_format_2x32bit_global);
323
324 NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
325
326 /* Lower a bunch of stuff */
327 NIR_PASS(_, nir, nir_lower_var_copies);
328
329 NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
330
331 NIR_PASS(_, nir, nir_lower_indirect_derefs,
332 nir_var_function_temp, 2);
333
334 NIR_PASS(_, nir, nir_lower_array_deref_of_vec,
335 nir_var_mem_ubo | nir_var_mem_ssbo, NULL,
336 nir_lower_direct_array_deref_of_vec_load);
337
338 NIR_PASS(_, nir, nir_lower_frexp);
339
340 /* Get rid of split copies */
341 v3d_optimize_nir(NULL, nir);
342 }
343
344 static nir_shader *
shader_module_compile_to_nir(struct v3dv_device * device,struct v3dv_pipeline_stage * stage)345 shader_module_compile_to_nir(struct v3dv_device *device,
346 struct v3dv_pipeline_stage *stage)
347 {
348 assert(stage->module || stage->module_info);
349
350 nir_shader *nir;
351 const nir_shader_compiler_options *nir_options =
352 v3dv_pipeline_get_nir_options(&device->devinfo);
353
354 gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(stage->stage);
355
356 const VkPipelineShaderStageCreateInfo stage_info = {
357 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
358 .pNext = !stage->module ? stage->module_info : NULL,
359 .stage = mesa_to_vk_shader_stage(gl_stage),
360 .module = vk_shader_module_to_handle((struct vk_shader_module *)stage->module),
361 .pName = stage->entrypoint,
362 .pSpecializationInfo = stage->spec_info,
363 };
364
365 /* vk_pipeline_shader_stage_to_nir also handles internal shaders when
366 * module->nir != NULL. It also calls nir_validate_shader on both cases
367 * so we don't have to call it here.
368 */
369 VkResult result = vk_pipeline_shader_stage_to_nir(&device->vk,
370 stage->pipeline->flags,
371 &stage_info,
372 &default_spirv_options,
373 nir_options,
374 NULL, &nir);
375 if (result != VK_SUCCESS)
376 return NULL;
377 assert(nir->info.stage == gl_stage);
378
379 if (V3D_DBG(SHADERDB) && (!stage->module || stage->module->nir == NULL)) {
380 char sha1buf[41];
381 _mesa_sha1_format(sha1buf, stage->pipeline->sha1);
382 nir->info.name = ralloc_strdup(nir, sha1buf);
383 }
384
385 if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
386 fprintf(stderr, "NIR after vk_pipeline_shader_stage_to_nir: %s prog %d NIR:\n",
387 broadcom_shader_stage_name(stage->stage),
388 stage->program_id);
389 nir_print_shader(nir, stderr);
390 fprintf(stderr, "\n");
391 }
392
393 preprocess_nir(nir);
394
395 return nir;
396 }
397
398 static int
type_size_vec4(const struct glsl_type * type,bool bindless)399 type_size_vec4(const struct glsl_type *type, bool bindless)
400 {
401 return glsl_count_attribute_slots(type, false);
402 }
403
404 /* FIXME: the number of parameters for this method is somewhat big. Perhaps
405 * rethink.
406 */
407 static unsigned
descriptor_map_add(struct v3dv_descriptor_map * map,int set,int binding,int array_index,int array_size,int start_index,uint8_t return_size,uint8_t plane)408 descriptor_map_add(struct v3dv_descriptor_map *map,
409 int set,
410 int binding,
411 int array_index,
412 int array_size,
413 int start_index,
414 uint8_t return_size,
415 uint8_t plane)
416 {
417 assert(array_index < array_size);
418 assert(return_size == 16 || return_size == 32);
419
420 unsigned index = start_index;
421 for (; index < map->num_desc; index++) {
422 if (map->used[index] &&
423 set == map->set[index] &&
424 binding == map->binding[index] &&
425 array_index == map->array_index[index] &&
426 plane == map->plane[index]) {
427 assert(array_size == map->array_size[index]);
428 if (return_size != map->return_size[index]) {
429 /* It the return_size is different it means that the same sampler
430 * was used for operations with different precision
431 * requirement. In this case we need to ensure that we use the
432 * larger one.
433 */
434 map->return_size[index] = 32;
435 }
436 return index;
437 } else if (!map->used[index]) {
438 break;
439 }
440 }
441
442 assert(index < DESCRIPTOR_MAP_SIZE);
443 assert(!map->used[index]);
444
445 map->used[index] = true;
446 map->set[index] = set;
447 map->binding[index] = binding;
448 map->array_index[index] = array_index;
449 map->array_size[index] = array_size;
450 map->return_size[index] = return_size;
451 map->plane[index] = plane;
452 map->num_desc = MAX2(map->num_desc, index + 1);
453
454 return index;
455 }
456
457 struct lower_pipeline_layout_state {
458 struct v3dv_pipeline *pipeline;
459 const struct v3dv_pipeline_layout *layout;
460 bool needs_default_sampler_state;
461 };
462
463
464 static void
lower_load_push_constant(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)465 lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
466 struct lower_pipeline_layout_state *state)
467 {
468 assert(instr->intrinsic == nir_intrinsic_load_push_constant);
469 instr->intrinsic = nir_intrinsic_load_uniform;
470 }
471
472 static struct v3dv_descriptor_map*
pipeline_get_descriptor_map(struct v3dv_pipeline * pipeline,VkDescriptorType desc_type,gl_shader_stage gl_stage,bool is_sampler)473 pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
474 VkDescriptorType desc_type,
475 gl_shader_stage gl_stage,
476 bool is_sampler)
477 {
478 enum broadcom_shader_stage broadcom_stage =
479 gl_shader_stage_to_broadcom(gl_stage);
480
481 assert(pipeline->shared_data &&
482 pipeline->shared_data->maps[broadcom_stage]);
483
484 switch(desc_type) {
485 case VK_DESCRIPTOR_TYPE_SAMPLER:
486 return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
487 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
488 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
489 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
490 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
491 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
492 return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
493 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
494 return is_sampler ?
495 &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
496 &pipeline->shared_data->maps[broadcom_stage]->texture_map;
497 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
498 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
499 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
500 return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
501 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
502 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
503 return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
504 default:
505 unreachable("Descriptor type unknown or not having a descriptor map");
506 }
507 }
508
509 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
510 * could be used by the v3d_compiler */
511 static void
lower_vulkan_resource_index(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)512 lower_vulkan_resource_index(nir_builder *b,
513 nir_intrinsic_instr *instr,
514 struct lower_pipeline_layout_state *state)
515 {
516 assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
517
518 nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
519
520 unsigned set = nir_intrinsic_desc_set(instr);
521 unsigned binding = nir_intrinsic_binding(instr);
522 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
523 struct v3dv_descriptor_set_binding_layout *binding_layout =
524 &set_layout->binding[binding];
525 unsigned index = 0;
526
527 switch (binding_layout->type) {
528 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
529 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
530 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
531 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
532 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
533 struct v3dv_descriptor_map *descriptor_map =
534 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
535 b->shader->info.stage, false);
536
537 if (!const_val)
538 unreachable("non-constant vulkan_resource_index array index");
539
540 /* At compile-time we will need to know if we are processing a UBO load
541 * for an inline or a regular UBO so we can handle inline loads like
542 * push constants. At the level of NIR level however, the inline
543 * information is gone, so we rely on the index to make this distinction.
544 * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for
545 * inline buffers. This means that at the descriptor map level
546 * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1,
547 * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS.
548 */
549 uint32_t start_index = 0;
550 if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
551 binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
552 start_index += MAX_INLINE_UNIFORM_BUFFERS;
553 }
554
555 index = descriptor_map_add(descriptor_map, set, binding,
556 const_val->u32,
557 binding_layout->array_size,
558 start_index,
559 32 /* return_size: doesn't really apply for this case */,
560 0);
561 break;
562 }
563
564 default:
565 unreachable("unsupported descriptor type for vulkan_resource_index");
566 break;
567 }
568
569 /* Since we use the deref pass, both vulkan_resource_index and
570 * vulkan_load_descriptor return a vec2 providing an index and
571 * offset. Our backend compiler only cares about the index part.
572 */
573 nir_def_replace(&instr->def, nir_imm_ivec2(b, index, 0));
574 }
575
576 static uint8_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)577 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
578 {
579 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
580 if (plane_src_idx < 0)
581 return 0;
582
583 uint8_t plane = nir_src_as_uint(tex->src[plane_src_idx].src);
584 nir_tex_instr_remove_src(tex, plane_src_idx);
585 return plane;
586 }
587
588 /* Returns return_size, so it could be used for the case of not having a
589 * sampler object
590 */
591 static uint8_t
lower_tex_src(nir_builder * b,nir_tex_instr * instr,unsigned src_idx,struct lower_pipeline_layout_state * state)592 lower_tex_src(nir_builder *b,
593 nir_tex_instr *instr,
594 unsigned src_idx,
595 struct lower_pipeline_layout_state *state)
596 {
597 nir_def *index = NULL;
598 unsigned base_index = 0;
599 unsigned array_elements = 1;
600 nir_tex_src *src = &instr->src[src_idx];
601 bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
602
603 uint8_t plane = tex_instr_get_and_remove_plane_src(instr);
604
605 /* We compute first the offsets */
606 nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);
607 while (deref->deref_type != nir_deref_type_var) {
608 nir_deref_instr *parent =
609 nir_instr_as_deref(deref->parent.ssa->parent_instr);
610
611 assert(deref->deref_type == nir_deref_type_array);
612
613 if (nir_src_is_const(deref->arr.index) && index == NULL) {
614 /* We're still building a direct index */
615 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
616 } else {
617 if (index == NULL) {
618 /* We used to be direct but not anymore */
619 index = nir_imm_int(b, base_index);
620 base_index = 0;
621 }
622
623 index = nir_iadd(b, index,
624 nir_imul_imm(b, deref->arr.index.ssa,
625 array_elements));
626 }
627
628 array_elements *= glsl_get_length(parent->type);
629
630 deref = parent;
631 }
632
633 if (index)
634 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
635
636 /* We have the offsets, we apply them, rewriting the source or removing
637 * instr if needed
638 */
639 if (index) {
640 nir_src_rewrite(&src->src, index);
641
642 src->src_type = is_sampler ?
643 nir_tex_src_sampler_offset :
644 nir_tex_src_texture_offset;
645 } else {
646 nir_tex_instr_remove_src(instr, src_idx);
647 }
648
649 uint32_t set = deref->var->data.descriptor_set;
650 uint32_t binding = deref->var->data.binding;
651 /* FIXME: this is a really simplified check for the precision to be used
652 * for the sampling. Right now we are only checking for the variables used
653 * on the operation itself, but there are other cases that we could use to
654 * infer the precision requirement.
655 */
656 bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||
657 deref->var->data.precision == GLSL_PRECISION_LOW;
658 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
659 struct v3dv_descriptor_set_binding_layout *binding_layout =
660 &set_layout->binding[binding];
661
662 /* For input attachments, the shader includes the attachment_idx. As we are
663 * treating them as a texture, we only want the base_index
664 */
665 uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?
666 deref->var->data.index + base_index :
667 base_index;
668
669 uint8_t return_size;
670 if (V3D_DBG(TMU_16BIT))
671 return_size = 16;
672 else if (V3D_DBG(TMU_32BIT))
673 return_size = 32;
674 else
675 return_size = relaxed_precision ? 16 : 32;
676
677 struct v3dv_descriptor_map *map =
678 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
679 b->shader->info.stage, is_sampler);
680 int desc_index =
681 descriptor_map_add(map,
682 deref->var->data.descriptor_set,
683 deref->var->data.binding,
684 array_index,
685 binding_layout->array_size,
686 0,
687 return_size,
688 plane);
689
690 if (is_sampler)
691 instr->sampler_index = desc_index;
692 else
693 instr->texture_index = desc_index;
694
695 return return_size;
696 }
697
698 static bool
lower_sampler(nir_builder * b,nir_tex_instr * instr,struct lower_pipeline_layout_state * state)699 lower_sampler(nir_builder *b,
700 nir_tex_instr *instr,
701 struct lower_pipeline_layout_state *state)
702 {
703 uint8_t return_size = 0;
704
705 int texture_idx =
706 nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
707
708 if (texture_idx >= 0)
709 return_size = lower_tex_src(b, instr, texture_idx, state);
710
711 int sampler_idx =
712 nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
713
714 if (sampler_idx >= 0) {
715 assert(nir_tex_instr_need_sampler(instr));
716 lower_tex_src(b, instr, sampler_idx, state);
717 }
718
719 if (texture_idx < 0 && sampler_idx < 0)
720 return false;
721
722 /* If the instruction doesn't have a sampler (i.e. txf) we use backend_flags
723 * to bind a default sampler state to configure precission.
724 */
725 if (sampler_idx < 0) {
726 state->needs_default_sampler_state = true;
727 instr->backend_flags = return_size == 16 ?
728 V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;
729 }
730
731 return true;
732 }
733
734 /* FIXME: really similar to lower_tex_src, perhaps refactor? */
735 static void
lower_image_deref(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)736 lower_image_deref(nir_builder *b,
737 nir_intrinsic_instr *instr,
738 struct lower_pipeline_layout_state *state)
739 {
740 nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
741 nir_def *index = NULL;
742 unsigned array_elements = 1;
743 unsigned base_index = 0;
744
745 while (deref->deref_type != nir_deref_type_var) {
746 nir_deref_instr *parent =
747 nir_instr_as_deref(deref->parent.ssa->parent_instr);
748
749 assert(deref->deref_type == nir_deref_type_array);
750
751 if (nir_src_is_const(deref->arr.index) && index == NULL) {
752 /* We're still building a direct index */
753 base_index += nir_src_as_uint(deref->arr.index) * array_elements;
754 } else {
755 if (index == NULL) {
756 /* We used to be direct but not anymore */
757 index = nir_imm_int(b, base_index);
758 base_index = 0;
759 }
760
761 index = nir_iadd(b, index,
762 nir_imul_imm(b, deref->arr.index.ssa,
763 array_elements));
764 }
765
766 array_elements *= glsl_get_length(parent->type);
767
768 deref = parent;
769 }
770
771 if (index)
772 index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));
773
774 uint32_t set = deref->var->data.descriptor_set;
775 uint32_t binding = deref->var->data.binding;
776 struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout;
777 struct v3dv_descriptor_set_binding_layout *binding_layout =
778 &set_layout->binding[binding];
779
780 uint32_t array_index = deref->var->data.index + base_index;
781
782 assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
783 binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
784
785 struct v3dv_descriptor_map *map =
786 pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
787 b->shader->info.stage, false);
788
789 int desc_index =
790 descriptor_map_add(map,
791 deref->var->data.descriptor_set,
792 deref->var->data.binding,
793 array_index,
794 binding_layout->array_size,
795 0,
796 32 /* return_size: doesn't apply for textures */,
797 0);
798
799 /* Note: we don't need to do anything here in relation to the precision and
800 * the output size because for images we can infer that info from the image
801 * intrinsic, that includes the image format (see
802 * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.
803 */
804
805 index = nir_imm_int(b, desc_index);
806
807 nir_rewrite_image_intrinsic(instr, index, false);
808 }
809
810 static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct lower_pipeline_layout_state * state)811 lower_intrinsic(nir_builder *b,
812 nir_intrinsic_instr *instr,
813 struct lower_pipeline_layout_state *state)
814 {
815 switch (instr->intrinsic) {
816 case nir_intrinsic_load_push_constant:
817 lower_load_push_constant(b, instr, state);
818 return true;
819
820 case nir_intrinsic_vulkan_resource_index:
821 lower_vulkan_resource_index(b, instr, state);
822 return true;
823
824 case nir_intrinsic_load_vulkan_descriptor: {
825 /* Loading the descriptor happens as part of load/store instructions,
826 * so for us this is a no-op.
827 */
828 nir_def_replace(&instr->def, instr->src[0].ssa);
829 return true;
830 }
831
832 case nir_intrinsic_image_deref_load:
833 case nir_intrinsic_image_deref_store:
834 case nir_intrinsic_image_deref_atomic:
835 case nir_intrinsic_image_deref_atomic_swap:
836 case nir_intrinsic_image_deref_size:
837 case nir_intrinsic_image_deref_samples:
838 lower_image_deref(b, instr, state);
839 return true;
840
841 default:
842 return false;
843 }
844 }
845
846 static bool
lower_pipeline_layout_cb(nir_builder * b,nir_instr * instr,void * _state)847 lower_pipeline_layout_cb(nir_builder *b,
848 nir_instr *instr,
849 void *_state)
850 {
851 bool progress = false;
852 struct lower_pipeline_layout_state *state = _state;
853
854 b->cursor = nir_before_instr(instr);
855 switch (instr->type) {
856 case nir_instr_type_tex:
857 progress |= lower_sampler(b, nir_instr_as_tex(instr), state);
858 break;
859 case nir_instr_type_intrinsic:
860 progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state);
861 break;
862 default:
863 break;
864 }
865
866 return progress;
867 }
868
869 static bool
lower_pipeline_layout_info(nir_shader * shader,struct v3dv_pipeline * pipeline,const struct v3dv_pipeline_layout * layout,bool * needs_default_sampler_state)870 lower_pipeline_layout_info(nir_shader *shader,
871 struct v3dv_pipeline *pipeline,
872 const struct v3dv_pipeline_layout *layout,
873 bool *needs_default_sampler_state)
874 {
875 bool progress = false;
876
877 struct lower_pipeline_layout_state state = {
878 .pipeline = pipeline,
879 .layout = layout,
880 .needs_default_sampler_state = false,
881 };
882
883 progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb,
884 nir_metadata_control_flow,
885 &state);
886
887 *needs_default_sampler_state = state.needs_default_sampler_state;
888
889 return progress;
890 }
891
892 /* This flips gl_PointCoord.y to match Vulkan requirements */
893 static bool
lower_point_coord_cb(nir_builder * b,nir_intrinsic_instr * intr,void * _state)894 lower_point_coord_cb(nir_builder *b, nir_intrinsic_instr *intr, void *_state)
895 {
896 if (intr->intrinsic != nir_intrinsic_load_input)
897 return false;
898
899 if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PNTC)
900 return false;
901
902 b->cursor = nir_after_instr(&intr->instr);
903 nir_def *result = &intr->def;
904 result =
905 nir_vector_insert_imm(b, result,
906 nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
907 nir_def_rewrite_uses_after(&intr->def,
908 result, result->parent_instr);
909 return true;
910 }
911
912 static bool
v3d_nir_lower_point_coord(nir_shader * s)913 v3d_nir_lower_point_coord(nir_shader *s)
914 {
915 assert(s->info.stage == MESA_SHADER_FRAGMENT);
916 return nir_shader_intrinsics_pass(s, lower_point_coord_cb,
917 nir_metadata_control_flow, NULL);
918 }
919
920 static void
lower_fs_io(nir_shader * nir)921 lower_fs_io(nir_shader *nir)
922 {
923 /* Our backend doesn't handle array fragment shader outputs */
924 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
925 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL);
926
927 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
928 MESA_SHADER_FRAGMENT);
929
930 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
931 MESA_SHADER_FRAGMENT);
932
933 NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
934 type_size_vec4, 0);
935 }
936
937 static void
lower_gs_io(struct nir_shader * nir)938 lower_gs_io(struct nir_shader *nir)
939 {
940 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
941
942 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
943 MESA_SHADER_GEOMETRY);
944
945 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
946 MESA_SHADER_GEOMETRY);
947 }
948
949 static void
lower_vs_io(struct nir_shader * nir)950 lower_vs_io(struct nir_shader *nir)
951 {
952 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
953
954 nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
955 MESA_SHADER_VERTEX);
956
957 nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
958 MESA_SHADER_VERTEX);
959
960 /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it
961 * overlaps with v3d_nir_lower_io. Need further research though.
962 */
963 }
964
965 static void
shader_debug_output(const char * message,void * data)966 shader_debug_output(const char *message, void *data)
967 {
968 /* FIXME: We probably don't want to debug anything extra here, and in fact
969 * the compiler is not using this callback too much, only as an alternative
970 * way to debug out the shaderdb stats, that you can already get using
971 * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
972 * compiler to remove that callback.
973 */
974 }
975
976 static void
pipeline_populate_v3d_key(struct v3d_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t ucp_enables)977 pipeline_populate_v3d_key(struct v3d_key *key,
978 const struct v3dv_pipeline_stage *p_stage,
979 uint32_t ucp_enables)
980 {
981 assert(p_stage->pipeline->shared_data &&
982 p_stage->pipeline->shared_data->maps[p_stage->stage]);
983
984 /* The following values are default values used at pipeline create. We use
985 * there 32 bit as default return size.
986 */
987 struct v3dv_descriptor_map *sampler_map =
988 &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
989 struct v3dv_descriptor_map *texture_map =
990 &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
991
992 key->num_tex_used = texture_map->num_desc;
993 assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
994 for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {
995 key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;
996 key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;
997 key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;
998 key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;
999 }
1000
1001 key->num_samplers_used = sampler_map->num_desc;
1002 assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);
1003 for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;
1004 sampler_idx++) {
1005 key->sampler[sampler_idx].return_size =
1006 sampler_map->return_size[sampler_idx];
1007
1008 key->sampler[sampler_idx].return_channels =
1009 key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
1010 }
1011
1012 switch (p_stage->stage) {
1013 case BROADCOM_SHADER_VERTEX:
1014 case BROADCOM_SHADER_VERTEX_BIN:
1015 key->is_last_geometry_stage =
1016 p_stage->pipeline->stages[BROADCOM_SHADER_GEOMETRY] == NULL;
1017 break;
1018 case BROADCOM_SHADER_GEOMETRY:
1019 case BROADCOM_SHADER_GEOMETRY_BIN:
1020 /* FIXME: while we don't implement tessellation shaders */
1021 key->is_last_geometry_stage = true;
1022 break;
1023 case BROADCOM_SHADER_FRAGMENT:
1024 case BROADCOM_SHADER_COMPUTE:
1025 key->is_last_geometry_stage = false;
1026 break;
1027 default:
1028 unreachable("unsupported shader stage");
1029 }
1030
1031 /* Vulkan doesn't have fixed function state for user clip planes. Instead,
1032 * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
1033 * takes care of adding a single compact array variable at
1034 * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.
1035 *
1036 * The only lowering we are interested is specific to the fragment shader,
1037 * where we want to emit discards to honor writes to gl_ClipDistance[] in
1038 * previous stages. This is done via nir_lower_clip_fs() so we only set up
1039 * the ucp enable mask for that stage.
1040 */
1041 key->ucp_enables = ucp_enables;
1042
1043 const VkPipelineRobustnessBufferBehaviorEXT robust_buffer_enabled =
1044 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1045
1046 const VkPipelineRobustnessImageBehaviorEXT robust_image_enabled =
1047 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT;
1048
1049 key->robust_uniform_access =
1050 p_stage->robustness.uniform_buffers == robust_buffer_enabled;
1051 key->robust_storage_access =
1052 p_stage->robustness.storage_buffers == robust_buffer_enabled;
1053 key->robust_image_access =
1054 p_stage->robustness.images == robust_image_enabled;
1055 }
1056
1057 /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
1058 * same. For not using prim_mode that is the one already used on v3d
1059 */
1060 static const enum mesa_prim vk_to_mesa_prim[] = {
1061 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = MESA_PRIM_POINTS,
1062 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = MESA_PRIM_LINES,
1063 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = MESA_PRIM_LINE_STRIP,
1064 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = MESA_PRIM_TRIANGLES,
1065 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = MESA_PRIM_TRIANGLE_STRIP,
1066 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = MESA_PRIM_TRIANGLE_FAN,
1067 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = MESA_PRIM_LINES_ADJACENCY,
1068 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = MESA_PRIM_LINE_STRIP_ADJACENCY,
1069 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = MESA_PRIM_TRIANGLES_ADJACENCY,
1070 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = MESA_PRIM_TRIANGLE_STRIP_ADJACENCY,
1071 };
1072
1073 uint32_t
v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim)1074 v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim)
1075 {
1076 return v3d_hw_prim_type(vk_to_mesa_prim[vk_prim]);
1077 }
1078
1079 static const enum pipe_logicop vk_to_pipe_logicop[] = {
1080 [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
1081 [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
1082 [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
1083 [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
1084 [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
1085 [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
1086 [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
1087 [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
1088 [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
1089 [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
1090 [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
1091 [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
1092 [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
1093 [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
1094 [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
1095 [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
1096 };
1097
1098 static bool
enable_line_smooth(struct v3dv_pipeline * pipeline,const VkPipelineRasterizationStateCreateInfo * rs_info)1099 enable_line_smooth(struct v3dv_pipeline *pipeline,
1100 const VkPipelineRasterizationStateCreateInfo *rs_info)
1101 {
1102 if (!pipeline->rasterization_enabled)
1103 return false;
1104
1105 const VkPipelineRasterizationLineStateCreateInfoKHR *ls_info =
1106 vk_find_struct_const(rs_info->pNext,
1107 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_KHR);
1108
1109 if (!ls_info)
1110 return false;
1111
1112 /* Although topology is dynamic now, the topology class can't change
1113 * because we don't support dynamicPrimitiveTopologyUnrestricted, so we can
1114 * use the static topology from the pipeline for this.
1115 */
1116 switch(pipeline->topology) {
1117 case MESA_PRIM_LINES:
1118 case MESA_PRIM_LINE_LOOP:
1119 case MESA_PRIM_LINE_STRIP:
1120 case MESA_PRIM_LINES_ADJACENCY:
1121 case MESA_PRIM_LINE_STRIP_ADJACENCY:
1122 return ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR;
1123 default:
1124 return false;
1125 }
1126 }
1127
1128 static void
v3d_fs_key_set_color_attachment(struct v3d_fs_key * key,const struct v3dv_pipeline_stage * p_stage,uint32_t index,VkFormat fb_format)1129 v3d_fs_key_set_color_attachment(struct v3d_fs_key *key,
1130 const struct v3dv_pipeline_stage *p_stage,
1131 uint32_t index,
1132 VkFormat fb_format)
1133 {
1134 key->cbufs |= 1 << index;
1135
1136 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
1137
1138 /* If logic operations are enabled then we might emit color reads and we
1139 * need to know the color buffer format and swizzle for that
1140 */
1141 if (key->logicop_func != PIPE_LOGICOP_COPY) {
1142 /* Framebuffer formats should be single plane */
1143 assert(vk_format_get_plane_count(fb_format) == 1);
1144 key->color_fmt[index].format = fb_pipe_format;
1145 memcpy(key->color_fmt[index].swizzle,
1146 v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format, 0),
1147 sizeof(key->color_fmt[index].swizzle));
1148 }
1149
1150 const struct util_format_description *desc =
1151 vk_format_description(fb_format);
1152
1153 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
1154 desc->channel[0].size == 32) {
1155 key->f32_color_rb |= 1 << index;
1156 }
1157
1158 if (p_stage->nir->info.fs.untyped_color_outputs) {
1159 if (util_format_is_pure_uint(fb_pipe_format))
1160 key->uint_color_rb |= 1 << index;
1161 else if (util_format_is_pure_sint(fb_pipe_format))
1162 key->int_color_rb |= 1 << index;
1163 }
1164 }
1165
1166 static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_render_pass_state * rendering_info,const struct v3dv_pipeline_stage * p_stage,bool has_geometry_shader,uint32_t ucp_enables)1167 pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
1168 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1169 const struct vk_render_pass_state *rendering_info,
1170 const struct v3dv_pipeline_stage *p_stage,
1171 bool has_geometry_shader,
1172 uint32_t ucp_enables)
1173 {
1174 assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
1175
1176 memset(key, 0, sizeof(*key));
1177
1178 struct v3dv_device *device = p_stage->pipeline->device;
1179 assert(device);
1180
1181 pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables);
1182
1183 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1184 pCreateInfo->pInputAssemblyState;
1185 uint8_t topology = vk_to_mesa_prim[ia_info->topology];
1186
1187 key->is_points = (topology == MESA_PRIM_POINTS);
1188 key->is_lines = (topology >= MESA_PRIM_LINES &&
1189 topology <= MESA_PRIM_LINE_STRIP);
1190
1191 if (key->is_points) {
1192 /* This mask represents state for GL_ARB_point_sprite which is not
1193 * relevant to Vulkan.
1194 */
1195 key->point_sprite_mask = 0;
1196
1197 /* Vulkan mandates upper left. */
1198 key->point_coord_upper_left = true;
1199 }
1200
1201 key->has_gs = has_geometry_shader;
1202
1203 const VkPipelineColorBlendStateCreateInfo *cb_info =
1204 p_stage->pipeline->rasterization_enabled ?
1205 pCreateInfo->pColorBlendState : NULL;
1206
1207 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1208 vk_to_pipe_logicop[cb_info->logicOp] :
1209 PIPE_LOGICOP_COPY;
1210
1211 /* Multisample rasterization state must be ignored if rasterization
1212 * is disabled.
1213 */
1214 const VkPipelineMultisampleStateCreateInfo *ms_info =
1215 p_stage->pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL;
1216 if (ms_info) {
1217 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
1218 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
1219 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
1220
1221 if (key->msaa)
1222 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
1223
1224 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
1225 }
1226
1227 key->line_smoothing = enable_line_smooth(p_stage->pipeline,
1228 pCreateInfo->pRasterizationState);
1229
1230 /* This is intended for V3D versions before 4.1, otherwise we just use the
1231 * tile buffer load/store swap R/B bit.
1232 */
1233 key->swap_color_rb = 0;
1234
1235 for (uint32_t i = 0; i < rendering_info->color_attachment_count; i++) {
1236 if (rendering_info->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
1237 continue;
1238 v3d_fs_key_set_color_attachment(key, p_stage, i,
1239 rendering_info->color_attachment_formats[i]);
1240 }
1241 }
1242
1243 static void
setup_stage_outputs_from_next_stage_inputs(uint8_t next_stage_num_inputs,struct v3d_varying_slot * next_stage_input_slots,uint8_t * num_used_outputs,struct v3d_varying_slot * used_output_slots,uint32_t size_of_used_output_slots)1244 setup_stage_outputs_from_next_stage_inputs(
1245 uint8_t next_stage_num_inputs,
1246 struct v3d_varying_slot *next_stage_input_slots,
1247 uint8_t *num_used_outputs,
1248 struct v3d_varying_slot *used_output_slots,
1249 uint32_t size_of_used_output_slots)
1250 {
1251 *num_used_outputs = next_stage_num_inputs;
1252 memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
1253 }
1254
1255 static void
pipeline_populate_v3d_gs_key(struct v3d_gs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1256 pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
1257 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1258 const struct v3dv_pipeline_stage *p_stage)
1259 {
1260 assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
1261 p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
1262
1263 struct v3dv_device *device = p_stage->pipeline->device;
1264 assert(device);
1265
1266 memset(key, 0, sizeof(*key));
1267
1268 pipeline_populate_v3d_key(&key->base, p_stage, 0);
1269
1270 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1271
1272 key->per_vertex_point_size =
1273 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1274
1275 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1276
1277 assert(key->base.is_last_geometry_stage);
1278 if (key->is_coord) {
1279 /* Output varyings in the last binning shader are only used for transform
1280 * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
1281 */
1282 key->num_used_outputs = 0;
1283 } else {
1284 struct v3dv_shader_variant *fs_variant =
1285 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1286
1287 STATIC_ASSERT(sizeof(key->used_outputs) ==
1288 sizeof(fs_variant->prog_data.fs->input_slots));
1289
1290 setup_stage_outputs_from_next_stage_inputs(
1291 fs_variant->prog_data.fs->num_inputs,
1292 fs_variant->prog_data.fs->input_slots,
1293 &key->num_used_outputs,
1294 key->used_outputs,
1295 sizeof(key->used_outputs));
1296 }
1297 }
1298
1299 static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct v3dv_pipeline_stage * p_stage)1300 pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
1301 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1302 const struct v3dv_pipeline_stage *p_stage)
1303 {
1304 assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
1305 p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
1306
1307 struct v3dv_device *device = p_stage->pipeline->device;
1308 assert(device);
1309
1310 memset(key, 0, sizeof(*key));
1311 pipeline_populate_v3d_key(&key->base, p_stage, 0);
1312
1313 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1314
1315 key->per_vertex_point_size =
1316 p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
1317
1318 key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
1319
1320 if (key->is_coord) { /* Binning VS*/
1321 if (key->base.is_last_geometry_stage) {
1322 /* Output varyings in the last binning shader are only used for
1323 * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
1324 * supported.
1325 */
1326 key->num_used_outputs = 0;
1327 } else {
1328 /* Linking against GS binning program */
1329 assert(pipeline->stages[BROADCOM_SHADER_GEOMETRY]);
1330 struct v3dv_shader_variant *gs_bin_variant =
1331 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
1332
1333 STATIC_ASSERT(sizeof(key->used_outputs) ==
1334 sizeof(gs_bin_variant->prog_data.gs->input_slots));
1335
1336 setup_stage_outputs_from_next_stage_inputs(
1337 gs_bin_variant->prog_data.gs->num_inputs,
1338 gs_bin_variant->prog_data.gs->input_slots,
1339 &key->num_used_outputs,
1340 key->used_outputs,
1341 sizeof(key->used_outputs));
1342 }
1343 } else { /* Render VS */
1344 if (pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
1345 /* Linking against GS render program */
1346 struct v3dv_shader_variant *gs_variant =
1347 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
1348
1349 STATIC_ASSERT(sizeof(key->used_outputs) ==
1350 sizeof(gs_variant->prog_data.gs->input_slots));
1351
1352 setup_stage_outputs_from_next_stage_inputs(
1353 gs_variant->prog_data.gs->num_inputs,
1354 gs_variant->prog_data.gs->input_slots,
1355 &key->num_used_outputs,
1356 key->used_outputs,
1357 sizeof(key->used_outputs));
1358 } else {
1359 /* Linking against FS program */
1360 struct v3dv_shader_variant *fs_variant =
1361 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
1362
1363 STATIC_ASSERT(sizeof(key->used_outputs) ==
1364 sizeof(fs_variant->prog_data.fs->input_slots));
1365
1366 setup_stage_outputs_from_next_stage_inputs(
1367 fs_variant->prog_data.fs->num_inputs,
1368 fs_variant->prog_data.fs->input_slots,
1369 &key->num_used_outputs,
1370 key->used_outputs,
1371 sizeof(key->used_outputs));
1372 }
1373 }
1374
1375 const VkPipelineVertexInputStateCreateInfo *vi_info =
1376 pCreateInfo->pVertexInputState;
1377 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
1378 const VkVertexInputAttributeDescription *desc =
1379 &vi_info->pVertexAttributeDescriptions[i];
1380 assert(desc->location < MAX_VERTEX_ATTRIBS);
1381 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
1382 desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
1383 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
1384 }
1385 }
1386 }
1387
1388 /**
1389 * Creates the initial form of the pipeline stage for a binning shader by
1390 * cloning the render shader and flagging it as a coordinate shader.
1391 *
1392 * Returns NULL if it was not able to allocate the object, so it should be
1393 * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
1394 */
1395 static struct v3dv_pipeline_stage *
pipeline_stage_create_binning(const struct v3dv_pipeline_stage * src,const VkAllocationCallbacks * pAllocator)1396 pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
1397 const VkAllocationCallbacks *pAllocator)
1398 {
1399 struct v3dv_device *device = src->pipeline->device;
1400
1401 struct v3dv_pipeline_stage *p_stage =
1402 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
1403 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1404
1405 if (p_stage == NULL)
1406 return NULL;
1407
1408 assert(src->stage == BROADCOM_SHADER_VERTEX ||
1409 src->stage == BROADCOM_SHADER_GEOMETRY);
1410
1411 enum broadcom_shader_stage bin_stage =
1412 src->stage == BROADCOM_SHADER_VERTEX ?
1413 BROADCOM_SHADER_VERTEX_BIN :
1414 BROADCOM_SHADER_GEOMETRY_BIN;
1415
1416 p_stage->pipeline = src->pipeline;
1417 p_stage->stage = bin_stage;
1418 p_stage->entrypoint = src->entrypoint;
1419 p_stage->module = src->module;
1420 p_stage->module_info = src->module_info;
1421
1422 /* For binning shaders we will clone the NIR code from the corresponding
1423 * render shader later, when we call pipeline_compile_xxx_shader. This way
1424 * we only have to run the relevant NIR lowerings once for render shaders
1425 */
1426 p_stage->nir = NULL;
1427 p_stage->program_id = src->program_id;
1428 p_stage->spec_info = src->spec_info;
1429 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
1430 p_stage->robustness = src->robustness;
1431 memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
1432
1433 return p_stage;
1434 }
1435
1436 /*
1437 * Based on some creation flags we assume that the QPU would be needed later
1438 * to gather further info. In that case we just keep the qput_insts around,
1439 * instead of map/unmap the bo later.
1440 */
1441 static bool
pipeline_keep_qpu(struct v3dv_pipeline * pipeline)1442 pipeline_keep_qpu(struct v3dv_pipeline *pipeline)
1443 {
1444 return pipeline->flags &
1445 (VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR |
1446 VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR);
1447 }
1448
1449 /**
1450 * Returns false if it was not able to allocate or map the assembly bo memory.
1451 */
1452 static bool
upload_assembly(struct v3dv_pipeline * pipeline)1453 upload_assembly(struct v3dv_pipeline *pipeline)
1454 {
1455 uint32_t total_size = 0;
1456 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1457 struct v3dv_shader_variant *variant =
1458 pipeline->shared_data->variants[stage];
1459
1460 if (variant != NULL)
1461 total_size += variant->qpu_insts_size;
1462 }
1463
1464 struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,
1465 "pipeline shader assembly", true);
1466 if (!bo) {
1467 fprintf(stderr, "failed to allocate memory for shader\n");
1468 return false;
1469 }
1470
1471 bool ok = v3dv_bo_map(pipeline->device, bo, total_size);
1472 if (!ok) {
1473 fprintf(stderr, "failed to map source shader buffer\n");
1474 return false;
1475 }
1476
1477 uint32_t offset = 0;
1478 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1479 struct v3dv_shader_variant *variant =
1480 pipeline->shared_data->variants[stage];
1481
1482 if (variant != NULL) {
1483 variant->assembly_offset = offset;
1484
1485 memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);
1486 offset += variant->qpu_insts_size;
1487
1488 if (!pipeline_keep_qpu(pipeline)) {
1489 free(variant->qpu_insts);
1490 variant->qpu_insts = NULL;
1491 }
1492 }
1493 }
1494 assert(total_size == offset);
1495
1496 pipeline->shared_data->assembly_bo = bo;
1497
1498 return true;
1499 }
1500
1501 static void
pipeline_hash_graphics(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1502 pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
1503 struct v3dv_pipeline_key *key,
1504 unsigned char *sha1_out)
1505 {
1506 struct mesa_sha1 ctx;
1507 _mesa_sha1_init(&ctx);
1508
1509 if (pipeline->layout) {
1510 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1511 sizeof(pipeline->layout->sha1));
1512 }
1513
1514 /* We need to include all shader stages in the sha1 key as linking may
1515 * modify the shader code in any stage. An alternative would be to use the
1516 * serialized NIR, but that seems like an overkill.
1517 */
1518 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1519 if (broadcom_shader_stage_is_binning(stage))
1520 continue;
1521
1522 struct v3dv_pipeline_stage *p_stage = pipeline->stages[stage];
1523 if (p_stage == NULL)
1524 continue;
1525
1526 assert(stage != BROADCOM_SHADER_COMPUTE);
1527
1528 _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1529 }
1530
1531 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1532
1533 _mesa_sha1_final(&ctx, sha1_out);
1534 }
1535
1536 static void
pipeline_hash_compute(const struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,unsigned char * sha1_out)1537 pipeline_hash_compute(const struct v3dv_pipeline *pipeline,
1538 struct v3dv_pipeline_key *key,
1539 unsigned char *sha1_out)
1540 {
1541 struct mesa_sha1 ctx;
1542 _mesa_sha1_init(&ctx);
1543
1544 if (pipeline->layout) {
1545 _mesa_sha1_update(&ctx, &pipeline->layout->sha1,
1546 sizeof(pipeline->layout->sha1));
1547 }
1548
1549 struct v3dv_pipeline_stage *p_stage =
1550 pipeline->stages[BROADCOM_SHADER_COMPUTE];
1551
1552 _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
1553
1554 _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));
1555
1556 _mesa_sha1_final(&ctx, sha1_out);
1557 }
1558
1559 /* Checks that the pipeline has enough spill size to use for any of their
1560 * variants
1561 */
1562 static void
pipeline_check_spill_size(struct v3dv_pipeline * pipeline)1563 pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
1564 {
1565 uint32_t max_spill_size = 0;
1566
1567 for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
1568 struct v3dv_shader_variant *variant =
1569 pipeline->shared_data->variants[stage];
1570
1571 if (variant != NULL) {
1572 max_spill_size = MAX2(variant->prog_data.base->spill_size,
1573 max_spill_size);
1574 }
1575 }
1576
1577 if (max_spill_size > 0) {
1578 struct v3dv_device *device = pipeline->device;
1579
1580 /* The TIDX register we use for choosing the area to access
1581 * for scratch space is: (core << 6) | (qpu << 2) | thread.
1582 * Even at minimum threadcount in a particular shader, that
1583 * means we still multiply by qpus by 4.
1584 */
1585 const uint32_t total_spill_size =
1586 4 * device->devinfo.qpu_count * max_spill_size;
1587 if (pipeline->spill.bo) {
1588 assert(pipeline->spill.size_per_thread > 0);
1589 v3dv_bo_free(device, pipeline->spill.bo);
1590 }
1591 pipeline->spill.bo =
1592 v3dv_bo_alloc(device, total_spill_size, "spill", true);
1593 pipeline->spill.size_per_thread = max_spill_size;
1594 }
1595 }
1596
1597 /**
1598 * Creates a new shader_variant_create. Note that for prog_data is not const,
1599 * so it is assumed that the caller will prove a pointer that the
1600 * shader_variant will own.
1601 *
1602 * Creation doesn't include allocate a BO to store the content of qpu_insts,
1603 * as we will try to share the same bo for several shader variants. Also note
1604 * that qpu_ints being NULL is valid, for example if we are creating the
1605 * shader_variants from the cache, so we can just upload the assembly of all
1606 * the shader stages at once.
1607 */
1608 struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device * device,enum broadcom_shader_stage stage,struct v3d_prog_data * prog_data,uint32_t prog_data_size,uint32_t assembly_offset,uint64_t * qpu_insts,uint32_t qpu_insts_size,VkResult * out_vk_result)1609 v3dv_shader_variant_create(struct v3dv_device *device,
1610 enum broadcom_shader_stage stage,
1611 struct v3d_prog_data *prog_data,
1612 uint32_t prog_data_size,
1613 uint32_t assembly_offset,
1614 uint64_t *qpu_insts,
1615 uint32_t qpu_insts_size,
1616 VkResult *out_vk_result)
1617 {
1618 struct v3dv_shader_variant *variant =
1619 vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,
1620 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1621
1622 if (variant == NULL) {
1623 *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
1624 return NULL;
1625 }
1626
1627 variant->stage = stage;
1628 variant->prog_data_size = prog_data_size;
1629 variant->prog_data.base = prog_data;
1630
1631 variant->assembly_offset = assembly_offset;
1632 variant->qpu_insts_size = qpu_insts_size;
1633 variant->qpu_insts = qpu_insts;
1634
1635 *out_vk_result = VK_SUCCESS;
1636
1637 return variant;
1638 }
1639
1640 /* For a given key, it returns the compiled version of the shader. Returns a
1641 * new reference to the shader_variant to the caller, or NULL.
1642 *
1643 * If the method returns NULL it means that something wrong happened:
1644 * * Not enough memory: this is one of the possible outcomes defined by
1645 * vkCreateXXXPipelines. out_vk_result will return the proper oom error.
1646 * * Compilation error: hypothetically this shouldn't happen, as the spec
1647 * states that vkShaderModule needs to be created with a valid SPIR-V, so
1648 * any compilation failure is a driver bug. In the practice, something as
1649 * common as failing to register allocate can lead to a compilation
1650 * failure. In that case the only option (for any driver) is
1651 * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
1652 * error.
1653 */
1654 static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage * p_stage,struct v3d_key * key,size_t key_size,const VkAllocationCallbacks * pAllocator,VkResult * out_vk_result)1655 pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
1656 struct v3d_key *key,
1657 size_t key_size,
1658 const VkAllocationCallbacks *pAllocator,
1659 VkResult *out_vk_result)
1660 {
1661 int64_t stage_start = os_time_get_nano();
1662
1663 struct v3dv_pipeline *pipeline = p_stage->pipeline;
1664 struct v3dv_physical_device *physical_device = pipeline->device->pdevice;
1665 const struct v3d_compiler *compiler = physical_device->compiler;
1666 gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(p_stage->stage);
1667
1668 if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) {
1669 fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
1670 broadcom_shader_stage_name(p_stage->stage),
1671 p_stage->program_id);
1672 nir_print_shader(p_stage->nir, stderr);
1673 fprintf(stderr, "\n");
1674 }
1675
1676 uint64_t *qpu_insts;
1677 uint32_t qpu_insts_size;
1678 struct v3d_prog_data *prog_data;
1679 uint32_t prog_data_size = v3d_prog_data_size(gl_stage);
1680
1681 qpu_insts = v3d_compile(compiler,
1682 key, &prog_data,
1683 p_stage->nir,
1684 shader_debug_output, NULL,
1685 p_stage->program_id, 0,
1686 &qpu_insts_size);
1687
1688 struct v3dv_shader_variant *variant = NULL;
1689
1690 if (!qpu_insts) {
1691 fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
1692 broadcom_shader_stage_name(p_stage->stage),
1693 p_stage->program_id);
1694 *out_vk_result = VK_ERROR_UNKNOWN;
1695 } else {
1696 variant =
1697 v3dv_shader_variant_create(pipeline->device, p_stage->stage,
1698 prog_data, prog_data_size,
1699 0, /* assembly_offset, no final value yet */
1700 qpu_insts, qpu_insts_size,
1701 out_vk_result);
1702 }
1703 /* At this point we don't need anymore the nir shader, but we are freeing
1704 * all the temporary p_stage structs used during the pipeline creation when
1705 * we finish it, so let's not worry about freeing the nir here.
1706 */
1707
1708 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1709
1710 return variant;
1711 }
1712
1713 static void
link_shaders(nir_shader * producer,nir_shader * consumer)1714 link_shaders(nir_shader *producer, nir_shader *consumer)
1715 {
1716 assert(producer);
1717 assert(consumer);
1718
1719 if (producer->options->lower_to_scalar) {
1720 NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
1721 NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
1722 }
1723
1724 nir_lower_io_arrays_to_elements(producer, consumer);
1725
1726 v3d_optimize_nir(NULL, producer);
1727 v3d_optimize_nir(NULL, consumer);
1728
1729 if (nir_link_opt_varyings(producer, consumer))
1730 v3d_optimize_nir(NULL, consumer);
1731
1732 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1733 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1734
1735 if (nir_remove_unused_varyings(producer, consumer)) {
1736 NIR_PASS(_, producer, nir_lower_global_vars_to_local);
1737 NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
1738
1739 v3d_optimize_nir(NULL, producer);
1740 v3d_optimize_nir(NULL, consumer);
1741
1742 /* Optimizations can cause varyings to become unused.
1743 * nir_compact_varyings() depends on all dead varyings being removed so
1744 * we need to call nir_remove_dead_variables() again here.
1745 */
1746 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1747 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1748 }
1749 }
1750
1751 static void
pipeline_lower_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline_layout * layout)1752 pipeline_lower_nir(struct v3dv_pipeline *pipeline,
1753 struct v3dv_pipeline_stage *p_stage,
1754 struct v3dv_pipeline_layout *layout)
1755 {
1756 int64_t stage_start = os_time_get_nano();
1757
1758 assert(pipeline->shared_data &&
1759 pipeline->shared_data->maps[p_stage->stage]);
1760
1761 NIR_PASS_V(p_stage->nir, nir_vk_lower_ycbcr_tex,
1762 lookup_ycbcr_conversion, layout);
1763
1764 nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
1765
1766 /* We add this because we need a valid sampler for nir_lower_tex to do
1767 * unpacking of the texture operation result, even for the case where there
1768 * is no sampler state.
1769 *
1770 * We add two of those, one for the case we need a 16bit return_size, and
1771 * another for the case we need a 32bit return size.
1772 */
1773 struct v3dv_descriptor_maps *maps =
1774 pipeline->shared_data->maps[p_stage->stage];
1775
1776 UNUSED unsigned index;
1777 index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16, 0);
1778 assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
1779
1780 index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32, 0);
1781 assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
1782
1783 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
1784 bool needs_default_sampler_state = false;
1785 NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout,
1786 &needs_default_sampler_state);
1787
1788 /* If in the end we didn't need to use the default sampler states and the
1789 * shader doesn't need any other samplers, get rid of them so we can
1790 * recognize that this program doesn't use any samplers at all.
1791 */
1792 if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2)
1793 maps->sampler_map.num_desc = 0;
1794
1795 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1796 }
1797
1798 /**
1799 * The SPIR-V compiler will insert a sized compact array for
1800 * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],
1801 * where the size of the array determines the number of active clip planes.
1802 */
1803 static uint32_t
get_ucp_enable_mask(struct v3dv_pipeline_stage * p_stage)1804 get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
1805 {
1806 assert(p_stage->stage == BROADCOM_SHADER_VERTEX);
1807 const nir_shader *shader = p_stage->nir;
1808 assert(shader);
1809
1810 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
1811 if (var->data.location == VARYING_SLOT_CLIP_DIST0) {
1812 assert(var->data.compact);
1813 return (1 << glsl_get_length(var->type)) - 1;
1814 }
1815 }
1816 return 0;
1817 }
1818
1819 static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage * p_stage,struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)1820 pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
1821 struct v3dv_pipeline *pipeline,
1822 struct v3dv_pipeline_cache *cache)
1823 {
1824 int64_t stage_start = os_time_get_nano();
1825
1826 nir_shader *nir = NULL;
1827 const nir_shader_compiler_options *nir_options =
1828 v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
1829
1830 nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
1831 nir_options,
1832 p_stage->shader_sha1);
1833
1834 if (nir) {
1835 assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
1836
1837 /* A NIR cache hit doesn't avoid the large majority of pipeline stage
1838 * creation so the cache hit is not recorded in the pipeline feedback
1839 * flags
1840 */
1841
1842 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1843
1844 return nir;
1845 }
1846
1847 nir = shader_module_compile_to_nir(pipeline->device, p_stage);
1848
1849 if (nir) {
1850 struct v3dv_pipeline_cache *default_cache =
1851 &pipeline->device->default_pipeline_cache;
1852
1853 v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
1854 p_stage->shader_sha1);
1855
1856 /* Ensure that the variant is on the default cache, as cmd_buffer could
1857 * need to change the current variant
1858 */
1859 if (default_cache != cache) {
1860 v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
1861 p_stage->shader_sha1);
1862 }
1863
1864 p_stage->feedback.duration += os_time_get_nano() - stage_start;
1865
1866 return nir;
1867 }
1868
1869 /* FIXME: this shouldn't happen, raise error? */
1870 return NULL;
1871 }
1872
1873 static VkResult
pipeline_compile_vertex_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1874 pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
1875 const VkAllocationCallbacks *pAllocator,
1876 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1877 {
1878 struct v3dv_pipeline_stage *p_stage_vs =
1879 pipeline->stages[BROADCOM_SHADER_VERTEX];
1880 struct v3dv_pipeline_stage *p_stage_vs_bin =
1881 pipeline->stages[BROADCOM_SHADER_VERTEX_BIN];
1882
1883 assert(p_stage_vs_bin != NULL);
1884 if (p_stage_vs_bin->nir == NULL) {
1885 assert(p_stage_vs->nir);
1886 p_stage_vs_bin->nir = nir_shader_clone(NULL, p_stage_vs->nir);
1887 }
1888
1889 VkResult vk_result;
1890 struct v3d_vs_key key;
1891 pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs);
1892 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =
1893 pipeline_compile_shader_variant(p_stage_vs, &key.base, sizeof(key),
1894 pAllocator, &vk_result);
1895 if (vk_result != VK_SUCCESS)
1896 return vk_result;
1897
1898 pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs_bin);
1899 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
1900 pipeline_compile_shader_variant(p_stage_vs_bin, &key.base, sizeof(key),
1901 pAllocator, &vk_result);
1902
1903 return vk_result;
1904 }
1905
1906 static VkResult
pipeline_compile_geometry_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1907 pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
1908 const VkAllocationCallbacks *pAllocator,
1909 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1910 {
1911 struct v3dv_pipeline_stage *p_stage_gs =
1912 pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1913 struct v3dv_pipeline_stage *p_stage_gs_bin =
1914 pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN];
1915
1916 assert(p_stage_gs);
1917 assert(p_stage_gs_bin != NULL);
1918 if (p_stage_gs_bin->nir == NULL) {
1919 assert(p_stage_gs->nir);
1920 p_stage_gs_bin->nir = nir_shader_clone(NULL, p_stage_gs->nir);
1921 }
1922
1923 VkResult vk_result;
1924 struct v3d_gs_key key;
1925 pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs);
1926 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
1927 pipeline_compile_shader_variant(p_stage_gs, &key.base, sizeof(key),
1928 pAllocator, &vk_result);
1929 if (vk_result != VK_SUCCESS)
1930 return vk_result;
1931
1932 pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs_bin);
1933 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
1934 pipeline_compile_shader_variant(p_stage_gs_bin, &key.base, sizeof(key),
1935 pAllocator, &vk_result);
1936
1937 return vk_result;
1938 }
1939
1940 static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline * pipeline,const VkAllocationCallbacks * pAllocator,const VkGraphicsPipelineCreateInfo * pCreateInfo)1941 pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
1942 const VkAllocationCallbacks *pAllocator,
1943 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1944 {
1945 struct v3dv_pipeline_stage *p_stage_vs =
1946 pipeline->stages[BROADCOM_SHADER_VERTEX];
1947 struct v3dv_pipeline_stage *p_stage_fs =
1948 pipeline->stages[BROADCOM_SHADER_FRAGMENT];
1949 struct v3dv_pipeline_stage *p_stage_gs =
1950 pipeline->stages[BROADCOM_SHADER_GEOMETRY];
1951
1952 struct v3d_fs_key key;
1953 pipeline_populate_v3d_fs_key(&key, pCreateInfo, &pipeline->rendering_info,
1954 p_stage_fs, p_stage_gs != NULL,
1955 get_ucp_enable_mask(p_stage_vs));
1956
1957 if (key.is_points) {
1958 assert(key.point_coord_upper_left);
1959 NIR_PASS(_, p_stage_fs->nir, v3d_nir_lower_point_coord);
1960 }
1961
1962 VkResult vk_result;
1963 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =
1964 pipeline_compile_shader_variant(p_stage_fs, &key.base, sizeof(key),
1965 pAllocator, &vk_result);
1966
1967 return vk_result;
1968 }
1969
1970 static void
pipeline_populate_graphics_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkGraphicsPipelineCreateInfo * pCreateInfo)1971 pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
1972 struct v3dv_pipeline_key *key,
1973 const VkGraphicsPipelineCreateInfo *pCreateInfo)
1974 {
1975 struct v3dv_device *device = pipeline->device;
1976 assert(device);
1977
1978 memset(key, 0, sizeof(*key));
1979
1980 key->line_smooth = pipeline->line_smooth;
1981
1982 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
1983 pCreateInfo->pInputAssemblyState;
1984 key->topology = vk_to_mesa_prim[ia_info->topology];
1985
1986 const VkPipelineColorBlendStateCreateInfo *cb_info =
1987 pipeline->rasterization_enabled ? pCreateInfo->pColorBlendState : NULL;
1988
1989 key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
1990 vk_to_pipe_logicop[cb_info->logicOp] :
1991 PIPE_LOGICOP_COPY;
1992
1993 /* Multisample rasterization state must be ignored if rasterization
1994 * is disabled.
1995 */
1996 const VkPipelineMultisampleStateCreateInfo *ms_info =
1997 pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL;
1998 if (ms_info) {
1999 assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||
2000 ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);
2001 key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
2002
2003 if (key->msaa)
2004 key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
2005
2006 key->sample_alpha_to_one = ms_info->alphaToOneEnable;
2007 }
2008
2009 struct vk_render_pass_state *ri = &pipeline->rendering_info;
2010 for (uint32_t i = 0; i < ri->color_attachment_count; i++) {
2011 if (ri->color_attachment_formats[i] == VK_FORMAT_UNDEFINED)
2012 continue;
2013
2014 key->cbufs |= 1 << i;
2015
2016 VkFormat fb_format = ri->color_attachment_formats[i];
2017 enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);
2018
2019 /* If logic operations are enabled then we might emit color reads and we
2020 * need to know the color buffer format and swizzle for that
2021 */
2022 if (key->logicop_func != PIPE_LOGICOP_COPY) {
2023 /* Framebuffer formats should be single plane */
2024 assert(vk_format_get_plane_count(fb_format) == 1);
2025 key->color_fmt[i].format = fb_pipe_format;
2026 memcpy(key->color_fmt[i].swizzle,
2027 v3dv_get_format_swizzle(pipeline->device, fb_format, 0),
2028 sizeof(key->color_fmt[i].swizzle));
2029 }
2030
2031 const struct util_format_description *desc =
2032 vk_format_description(fb_format);
2033
2034 if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
2035 desc->channel[0].size == 32) {
2036 key->f32_color_rb |= 1 << i;
2037 }
2038 }
2039
2040 const VkPipelineVertexInputStateCreateInfo *vi_info =
2041 pCreateInfo->pVertexInputState;
2042 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2043 const VkVertexInputAttributeDescription *desc =
2044 &vi_info->pVertexAttributeDescriptions[i];
2045 assert(desc->location < MAX_VERTEX_ATTRIBS);
2046 if (desc->format == VK_FORMAT_B8G8R8A8_UNORM ||
2047 desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) {
2048 key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
2049 }
2050 }
2051
2052 key->has_multiview = ri->view_mask != 0;
2053 }
2054
2055 static void
pipeline_populate_compute_key(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_key * key,const VkComputePipelineCreateInfo * pCreateInfo)2056 pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
2057 struct v3dv_pipeline_key *key,
2058 const VkComputePipelineCreateInfo *pCreateInfo)
2059 {
2060 struct v3dv_device *device = pipeline->device;
2061 assert(device);
2062
2063 /* We use the same pipeline key for graphics and compute, but we don't need
2064 * to add a field to flag compute keys because this key is not used alone
2065 * to search in the cache, we also use the SPIR-V or the serialized NIR for
2066 * example, which already flags compute shaders.
2067 */
2068 memset(key, 0, sizeof(*key));
2069 }
2070
2071 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],struct v3dv_pipeline * pipeline,bool is_graphics_pipeline)2072 v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
2073 struct v3dv_pipeline *pipeline,
2074 bool is_graphics_pipeline)
2075 {
2076 /* We create new_entry using the device alloc. Right now shared_data is ref
2077 * and unref by both the pipeline and the pipeline cache, so we can't
2078 * ensure that the cache or pipeline alloc will be available on the last
2079 * unref.
2080 */
2081 struct v3dv_pipeline_shared_data *new_entry =
2082 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2083 sizeof(struct v3dv_pipeline_shared_data), 8,
2084 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2085
2086 if (new_entry == NULL)
2087 return NULL;
2088
2089 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2090 /* We don't need specific descriptor maps for binning stages we use the
2091 * map for the render stage.
2092 */
2093 if (broadcom_shader_stage_is_binning(stage))
2094 continue;
2095
2096 if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
2097 (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
2098 continue;
2099 }
2100
2101 if (stage == BROADCOM_SHADER_GEOMETRY &&
2102 !pipeline->stages[BROADCOM_SHADER_GEOMETRY]) {
2103 /* We always inject a custom GS if we have multiview */
2104 if (!pipeline->rendering_info.view_mask)
2105 continue;
2106 }
2107
2108 struct v3dv_descriptor_maps *new_maps =
2109 vk_zalloc2(&pipeline->device->vk.alloc, NULL,
2110 sizeof(struct v3dv_descriptor_maps), 8,
2111 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2112
2113 if (new_maps == NULL)
2114 goto fail;
2115
2116 new_entry->maps[stage] = new_maps;
2117 }
2118
2119 new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
2120 new_entry->maps[BROADCOM_SHADER_VERTEX];
2121
2122 new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
2123 new_entry->maps[BROADCOM_SHADER_GEOMETRY];
2124
2125 new_entry->ref_cnt = 1;
2126 memcpy(new_entry->sha1_key, sha1_key, 20);
2127
2128 return new_entry;
2129
2130 fail:
2131 if (new_entry != NULL) {
2132 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
2133 if (new_entry->maps[stage] != NULL)
2134 vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
2135 }
2136 }
2137
2138 vk_free(&pipeline->device->vk.alloc, new_entry);
2139
2140 return NULL;
2141 }
2142
2143 static void
write_creation_feedback(struct v3dv_pipeline * pipeline,const void * next,const VkPipelineCreationFeedback * pipeline_feedback,uint32_t stage_count,const VkPipelineShaderStageCreateInfo * stages)2144 write_creation_feedback(struct v3dv_pipeline *pipeline,
2145 const void *next,
2146 const VkPipelineCreationFeedback *pipeline_feedback,
2147 uint32_t stage_count,
2148 const VkPipelineShaderStageCreateInfo *stages)
2149 {
2150 const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2151 vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2152
2153 if (create_feedback) {
2154 typed_memcpy(create_feedback->pPipelineCreationFeedback,
2155 pipeline_feedback,
2156 1);
2157
2158 const uint32_t feedback_stage_count =
2159 create_feedback->pipelineStageCreationFeedbackCount;
2160 assert(feedback_stage_count <= stage_count);
2161
2162 for (uint32_t i = 0; i < feedback_stage_count; i++) {
2163 gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
2164 enum broadcom_shader_stage bs = gl_shader_stage_to_broadcom(s);
2165
2166 create_feedback->pPipelineStageCreationFeedbacks[i] =
2167 pipeline->stages[bs]->feedback;
2168
2169 if (broadcom_shader_stage_is_render_with_binning(bs)) {
2170 enum broadcom_shader_stage bs_bin =
2171 broadcom_binning_shader_stage_for_render_stage(bs);
2172 create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
2173 pipeline->stages[bs_bin]->feedback.duration;
2174 }
2175 }
2176 }
2177 }
2178
2179 /* Note that although PrimitiveTopology is now dynamic, it is still safe to
2180 * compute the gs_input/output_primitive from the topology saved at the
2181 * pipeline, as the topology class will not change, because we don't support
2182 * dynamicPrimitiveTopologyUnrestricted
2183 */
2184 static enum mesa_prim
multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2185 multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2186 {
2187 switch (pipeline->topology) {
2188 case MESA_PRIM_POINTS:
2189 return MESA_PRIM_POINTS;
2190 case MESA_PRIM_LINES:
2191 case MESA_PRIM_LINE_STRIP:
2192 return MESA_PRIM_LINES;
2193 case MESA_PRIM_TRIANGLES:
2194 case MESA_PRIM_TRIANGLE_STRIP:
2195 case MESA_PRIM_TRIANGLE_FAN:
2196 return MESA_PRIM_TRIANGLES;
2197 default:
2198 /* Since we don't allow GS with multiview, we can only see non-adjacency
2199 * primitives.
2200 */
2201 unreachable("Unexpected pipeline primitive type");
2202 }
2203 }
2204
2205 static enum mesa_prim
multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline * pipeline)2206 multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
2207 {
2208 switch (pipeline->topology) {
2209 case MESA_PRIM_POINTS:
2210 return MESA_PRIM_POINTS;
2211 case MESA_PRIM_LINES:
2212 case MESA_PRIM_LINE_STRIP:
2213 return MESA_PRIM_LINE_STRIP;
2214 case MESA_PRIM_TRIANGLES:
2215 case MESA_PRIM_TRIANGLE_STRIP:
2216 case MESA_PRIM_TRIANGLE_FAN:
2217 return MESA_PRIM_TRIANGLE_STRIP;
2218 default:
2219 /* Since we don't allow GS with multiview, we can only see non-adjacency
2220 * primitives.
2221 */
2222 unreachable("Unexpected pipeline primitive type");
2223 }
2224 }
2225
2226 static bool
pipeline_add_multiview_gs(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkAllocationCallbacks * pAllocator)2227 pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
2228 struct v3dv_pipeline_cache *cache,
2229 const VkAllocationCallbacks *pAllocator)
2230 {
2231 /* Create the passthrough GS from the VS output interface */
2232 struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2233 p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2234 nir_shader *vs_nir = p_stage_vs->nir;
2235
2236 const nir_shader_compiler_options *options =
2237 v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
2238 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
2239 "multiview broadcast gs");
2240 nir_shader *nir = b.shader;
2241 nir->info.inputs_read = vs_nir->info.outputs_written;
2242 nir->info.outputs_written = vs_nir->info.outputs_written |
2243 (1ull << VARYING_SLOT_LAYER);
2244
2245 uint32_t vertex_count = mesa_vertices_per_prim(pipeline->topology);
2246 nir->info.gs.input_primitive =
2247 multiview_gs_input_primitive_from_pipeline(pipeline);
2248 nir->info.gs.output_primitive =
2249 multiview_gs_output_primitive_from_pipeline(pipeline);
2250 nir->info.gs.vertices_in = vertex_count;
2251 nir->info.gs.vertices_out = nir->info.gs.vertices_in;
2252 nir->info.gs.invocations = 1;
2253 nir->info.gs.active_stream_mask = 0x1;
2254
2255 /* Make a list of GS input/output variables from the VS outputs */
2256 nir_variable *in_vars[100];
2257 nir_variable *out_vars[100];
2258 uint32_t var_count = 0;
2259 nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
2260 char name[8];
2261 snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
2262
2263 in_vars[var_count] =
2264 nir_variable_create(nir, nir_var_shader_in,
2265 glsl_array_type(out_vs_var->type, vertex_count, 0),
2266 name);
2267 in_vars[var_count]->data.location = out_vs_var->data.location;
2268 in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
2269 in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2270
2271 snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
2272 out_vars[var_count] =
2273 nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
2274 out_vars[var_count]->data.location = out_vs_var->data.location;
2275 out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
2276
2277 var_count++;
2278 }
2279
2280 /* Add the gl_Layer output variable */
2281 nir_variable *out_layer =
2282 nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
2283 "out_Layer");
2284 out_layer->data.location = VARYING_SLOT_LAYER;
2285
2286 /* Get the view index value that we will write to gl_Layer */
2287 nir_def *layer =
2288 nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
2289
2290 /* Emit all output vertices */
2291 for (uint32_t vi = 0; vi < vertex_count; vi++) {
2292 /* Emit all output varyings */
2293 for (uint32_t i = 0; i < var_count; i++) {
2294 nir_deref_instr *in_value =
2295 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
2296 nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
2297 }
2298
2299 /* Emit gl_Layer write */
2300 nir_store_var(&b, out_layer, layer, 0x1);
2301
2302 nir_emit_vertex(&b, 0);
2303 }
2304 nir_end_primitive(&b, 0);
2305
2306 /* Make sure we run our pre-process NIR passes so we produce NIR compatible
2307 * with what we expect from SPIR-V modules.
2308 */
2309 preprocess_nir(nir);
2310
2311 /* Attach the geometry shader to the pipeline */
2312 struct v3dv_device *device = pipeline->device;
2313 struct v3dv_physical_device *physical_device = device->pdevice;
2314
2315 struct v3dv_pipeline_stage *p_stage =
2316 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2317 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2318
2319 if (p_stage == NULL) {
2320 ralloc_free(nir);
2321 return false;
2322 }
2323
2324 p_stage->pipeline = pipeline;
2325 p_stage->stage = BROADCOM_SHADER_GEOMETRY;
2326 p_stage->entrypoint = "main";
2327 p_stage->module = NULL;
2328 p_stage->module_info = NULL;
2329 p_stage->nir = nir;
2330 pipeline_compute_sha1_from_nir(p_stage);
2331 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
2332 p_stage->robustness = pipeline->stages[BROADCOM_SHADER_VERTEX]->robustness;
2333
2334 pipeline->has_gs = true;
2335 pipeline->stages[BROADCOM_SHADER_GEOMETRY] = p_stage;
2336 pipeline->active_stages |= MESA_SHADER_GEOMETRY;
2337
2338 pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] =
2339 pipeline_stage_create_binning(p_stage, pAllocator);
2340 if (pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] == NULL)
2341 return false;
2342
2343 return true;
2344 }
2345
2346 static void
pipeline_check_buffer_device_address(struct v3dv_pipeline * pipeline)2347 pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline)
2348 {
2349 for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) {
2350 struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i];
2351 if (variant && variant->prog_data.base->has_global_address) {
2352 pipeline->uses_buffer_device_address = true;
2353 return;
2354 }
2355 }
2356
2357 pipeline->uses_buffer_device_address = false;
2358 }
2359
2360 /*
2361 * It compiles a pipeline. Note that it also allocate internal object, but if
2362 * some allocations success, but other fails, the method is not freeing the
2363 * successful ones.
2364 *
2365 * This is done to simplify the code, as what we do in this case is just call
2366 * the pipeline destroy method, and this would handle freeing the internal
2367 * objects allocated. We just need to be careful setting to NULL the objects
2368 * not allocated.
2369 */
2370 static VkResult
pipeline_compile_graphics(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2371 pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
2372 struct v3dv_pipeline_cache *cache,
2373 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2374 const VkAllocationCallbacks *pAllocator)
2375 {
2376 VkPipelineCreationFeedback pipeline_feedback = {
2377 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2378 };
2379 int64_t pipeline_start = os_time_get_nano();
2380
2381 struct v3dv_device *device = pipeline->device;
2382 struct v3dv_physical_device *physical_device = device->pdevice;
2383
2384 /* First pass to get some common info from the shader, and create the
2385 * individual pipeline_stage objects
2386 */
2387 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2388 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2389 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2390
2391 struct v3dv_pipeline_stage *p_stage =
2392 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2393 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2394
2395 if (p_stage == NULL)
2396 return VK_ERROR_OUT_OF_HOST_MEMORY;
2397
2398 p_stage->program_id =
2399 p_atomic_inc_return(&physical_device->next_program_id);
2400
2401 enum broadcom_shader_stage broadcom_stage =
2402 gl_shader_stage_to_broadcom(stage);
2403
2404 p_stage->pipeline = pipeline;
2405 p_stage->stage = broadcom_stage;
2406 p_stage->entrypoint = sinfo->pName;
2407 p_stage->module = vk_shader_module_from_handle(sinfo->module);
2408 p_stage->spec_info = sinfo->pSpecializationInfo;
2409 if (!p_stage->module) {
2410 p_stage->module_info =
2411 vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
2412 }
2413
2414 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2415 pCreateInfo->pNext, sinfo->pNext);
2416
2417 vk_pipeline_hash_shader_stage(pipeline->flags,
2418 &pCreateInfo->pStages[i],
2419 &p_stage->robustness,
2420 p_stage->shader_sha1);
2421
2422 pipeline->active_stages |= sinfo->stage;
2423
2424 /* We will try to get directly the compiled shader variant, so let's not
2425 * worry about getting the nir shader for now.
2426 */
2427 p_stage->nir = NULL;
2428 pipeline->stages[broadcom_stage] = p_stage;
2429 if (broadcom_stage == BROADCOM_SHADER_GEOMETRY)
2430 pipeline->has_gs = true;
2431
2432 if (broadcom_shader_stage_is_render_with_binning(broadcom_stage)) {
2433 enum broadcom_shader_stage broadcom_stage_bin =
2434 broadcom_binning_shader_stage_for_render_stage(broadcom_stage);
2435
2436 pipeline->stages[broadcom_stage_bin] =
2437 pipeline_stage_create_binning(p_stage, pAllocator);
2438
2439 if (pipeline->stages[broadcom_stage_bin] == NULL)
2440 return VK_ERROR_OUT_OF_HOST_MEMORY;
2441 }
2442 }
2443
2444 /* Add a no-op fragment shader if needed */
2445 if (!pipeline->stages[BROADCOM_SHADER_FRAGMENT]) {
2446 const nir_shader_compiler_options *compiler_options =
2447 v3dv_pipeline_get_nir_options(&pipeline->device->devinfo);
2448 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
2449 compiler_options,
2450 "noop_fs");
2451
2452 struct v3dv_pipeline_stage *p_stage =
2453 vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
2454 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2455
2456 if (p_stage == NULL)
2457 return VK_ERROR_OUT_OF_HOST_MEMORY;
2458
2459 p_stage->pipeline = pipeline;
2460 p_stage->stage = BROADCOM_SHADER_FRAGMENT;
2461 p_stage->entrypoint = "main";
2462 p_stage->module = NULL;
2463 p_stage->module_info = NULL;
2464 p_stage->nir = b.shader;
2465 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
2466 NULL, NULL);
2467 pipeline_compute_sha1_from_nir(p_stage);
2468 p_stage->program_id =
2469 p_atomic_inc_return(&physical_device->next_program_id);
2470
2471 pipeline->stages[BROADCOM_SHADER_FRAGMENT] = p_stage;
2472 pipeline->active_stages |= MESA_SHADER_FRAGMENT;
2473 }
2474
2475 /* If multiview is enabled, we inject a custom passthrough geometry shader
2476 * to broadcast draw calls to the appropriate views.
2477 */
2478 const uint32_t view_mask = pipeline->rendering_info.view_mask;
2479 assert(!view_mask ||
2480 (!pipeline->has_gs && !pipeline->stages[BROADCOM_SHADER_GEOMETRY]));
2481 if (view_mask) {
2482 if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
2483 return VK_ERROR_OUT_OF_HOST_MEMORY;
2484 }
2485
2486 /* First we try to get the variants from the pipeline cache (unless we are
2487 * required to capture internal representations, since in that case we need
2488 * compile).
2489 */
2490 bool needs_executable_info =
2491 pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
2492 if (!needs_executable_info) {
2493 struct v3dv_pipeline_key pipeline_key;
2494 pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
2495 pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1);
2496
2497 bool cache_hit = false;
2498
2499 pipeline->shared_data =
2500 v3dv_pipeline_cache_search_for_pipeline(cache,
2501 pipeline->sha1,
2502 &cache_hit);
2503
2504 if (pipeline->shared_data != NULL) {
2505 /* A correct pipeline must have at least a VS and FS */
2506 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
2507 assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2508 assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2509 assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2510 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
2511 assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] ||
2512 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2513
2514 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
2515 pipeline_feedback.flags |=
2516 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2517
2518 goto success;
2519 }
2520 }
2521
2522 if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
2523 return VK_PIPELINE_COMPILE_REQUIRED;
2524
2525 /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
2526 * shader or the pipeline cache) and compile.
2527 */
2528 pipeline->shared_data =
2529 v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true);
2530 if (!pipeline->shared_data)
2531 return VK_ERROR_OUT_OF_HOST_MEMORY;
2532
2533 struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX];
2534 struct v3dv_pipeline_stage *p_stage_fs = pipeline->stages[BROADCOM_SHADER_FRAGMENT];
2535 struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY];
2536
2537 p_stage_vs->feedback.flags |=
2538 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2539 if (p_stage_gs)
2540 p_stage_gs->feedback.flags |=
2541 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2542 p_stage_fs->feedback.flags |=
2543 VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2544
2545 if (!p_stage_vs->nir)
2546 p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache);
2547 if (p_stage_gs && !p_stage_gs->nir)
2548 p_stage_gs->nir = pipeline_stage_get_nir(p_stage_gs, pipeline, cache);
2549 if (!p_stage_fs->nir)
2550 p_stage_fs->nir = pipeline_stage_get_nir(p_stage_fs, pipeline, cache);
2551
2552 /* Linking + pipeline lowerings */
2553 if (p_stage_gs) {
2554 link_shaders(p_stage_gs->nir, p_stage_fs->nir);
2555 link_shaders(p_stage_vs->nir, p_stage_gs->nir);
2556 } else {
2557 link_shaders(p_stage_vs->nir, p_stage_fs->nir);
2558 }
2559
2560 pipeline_lower_nir(pipeline, p_stage_fs, pipeline->layout);
2561 lower_fs_io(p_stage_fs->nir);
2562
2563 if (p_stage_gs) {
2564 pipeline_lower_nir(pipeline, p_stage_gs, pipeline->layout);
2565 lower_gs_io(p_stage_gs->nir);
2566 }
2567
2568 pipeline_lower_nir(pipeline, p_stage_vs, pipeline->layout);
2569 lower_vs_io(p_stage_vs->nir);
2570
2571 /* Compiling to vir */
2572 VkResult vk_result;
2573
2574 /* We should have got all the variants or no variants from the cache */
2575 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
2576 vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator,
2577 pCreateInfo);
2578 if (vk_result != VK_SUCCESS)
2579 return vk_result;
2580
2581 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
2582 !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
2583
2584 if (p_stage_gs) {
2585 vk_result =
2586 pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
2587 if (vk_result != VK_SUCCESS)
2588 return vk_result;
2589 }
2590
2591 assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
2592 !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
2593
2594 vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);
2595 if (vk_result != VK_SUCCESS)
2596 return vk_result;
2597
2598 if (!upload_assembly(pipeline))
2599 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2600
2601 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
2602
2603 success:
2604
2605 pipeline_check_buffer_device_address(pipeline);
2606
2607 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2608 write_creation_feedback(pipeline,
2609 pCreateInfo->pNext,
2610 &pipeline_feedback,
2611 pCreateInfo->stageCount,
2612 pCreateInfo->pStages);
2613
2614 /* Since we have the variants in the pipeline shared data we can now free
2615 * the pipeline stages.
2616 */
2617 if (!needs_executable_info)
2618 pipeline_free_stages(device, pipeline, pAllocator);
2619
2620 pipeline_check_spill_size(pipeline);
2621
2622 return compute_vpm_config(pipeline);
2623 }
2624
2625 static VkResult
compute_vpm_config(struct v3dv_pipeline * pipeline)2626 compute_vpm_config(struct v3dv_pipeline *pipeline)
2627 {
2628 struct v3dv_shader_variant *vs_variant =
2629 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2630 struct v3dv_shader_variant *vs_bin_variant =
2631 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
2632 struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
2633 struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
2634
2635 struct v3d_gs_prog_data *gs = NULL;
2636 struct v3d_gs_prog_data *gs_bin = NULL;
2637 if (pipeline->has_gs) {
2638 struct v3dv_shader_variant *gs_variant =
2639 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
2640 struct v3dv_shader_variant *gs_bin_variant =
2641 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
2642 gs = gs_variant->prog_data.gs;
2643 gs_bin = gs_bin_variant->prog_data.gs;
2644 }
2645
2646 if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
2647 vs_bin, vs, gs_bin, gs,
2648 &pipeline->vpm_cfg_bin,
2649 &pipeline->vpm_cfg)) {
2650 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2651 }
2652
2653 return VK_SUCCESS;
2654 }
2655
2656 static bool
stencil_op_is_no_op(struct vk_stencil_test_face_state * stencil)2657 stencil_op_is_no_op(struct vk_stencil_test_face_state *stencil)
2658 {
2659 return stencil->op.depth_fail == VK_STENCIL_OP_KEEP &&
2660 stencil->op.compare == VK_COMPARE_OP_ALWAYS;
2661 }
2662
2663 /* Computes the ez_state based on a given vk_dynamic_graphics_state. Note
2664 * that the parameter dyn doesn't need to be pipeline->dynamic_graphics_state,
2665 * as this method can be used by the cmd_buffer too.
2666 */
2667 void
v3dv_compute_ez_state(struct vk_dynamic_graphics_state * dyn,struct v3dv_pipeline * pipeline,enum v3dv_ez_state * ez_state,bool * incompatible_ez_test)2668 v3dv_compute_ez_state(struct vk_dynamic_graphics_state *dyn,
2669 struct v3dv_pipeline *pipeline,
2670 enum v3dv_ez_state *ez_state,
2671 bool *incompatible_ez_test)
2672 {
2673 if (!dyn->ds.depth.test_enable) {
2674 *ez_state = V3D_EZ_DISABLED;
2675 return;
2676 }
2677
2678 switch (dyn->ds.depth.compare_op) {
2679 case VK_COMPARE_OP_LESS:
2680 case VK_COMPARE_OP_LESS_OR_EQUAL:
2681 *ez_state = V3D_EZ_LT_LE;
2682 break;
2683 case VK_COMPARE_OP_GREATER:
2684 case VK_COMPARE_OP_GREATER_OR_EQUAL:
2685 *ez_state = V3D_EZ_GT_GE;
2686 break;
2687 case VK_COMPARE_OP_NEVER:
2688 case VK_COMPARE_OP_EQUAL:
2689 *ez_state = V3D_EZ_UNDECIDED;
2690 break;
2691 default:
2692 *ez_state = V3D_EZ_DISABLED;
2693 *incompatible_ez_test = true;
2694 break;
2695 }
2696
2697 /* If stencil is enabled and is not a no-op, we need to disable EZ */
2698 if (dyn->ds.stencil.test_enable &&
2699 (!stencil_op_is_no_op(&dyn->ds.stencil.front) ||
2700 !stencil_op_is_no_op(&dyn->ds.stencil.back))) {
2701 *ez_state = V3D_EZ_DISABLED;
2702 }
2703
2704 /* If the FS writes Z, then it may update against the chosen EZ direction */
2705 struct v3dv_shader_variant *fs_variant =
2706 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
2707 if (fs_variant && fs_variant->prog_data.fs->writes_z &&
2708 !fs_variant->prog_data.fs->writes_z_from_fep) {
2709 *ez_state = V3D_EZ_DISABLED;
2710 }
2711 }
2712
2713
2714 static void
pipeline_set_sample_mask(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2715 pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
2716 const VkPipelineMultisampleStateCreateInfo *ms_info)
2717 {
2718 pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;
2719
2720 /* Ignore pSampleMask if we are not enabling multisampling. The hardware
2721 * requires this to be 0xf or 0x0 if using a single sample.
2722 */
2723 if (ms_info && ms_info->pSampleMask &&
2724 ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {
2725 pipeline->sample_mask &= ms_info->pSampleMask[0];
2726 }
2727 }
2728
2729 static void
pipeline_set_sample_rate_shading(struct v3dv_pipeline * pipeline,const VkPipelineMultisampleStateCreateInfo * ms_info)2730 pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,
2731 const VkPipelineMultisampleStateCreateInfo *ms_info)
2732 {
2733 pipeline->sample_rate_shading =
2734 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&
2735 ms_info->sampleShadingEnable;
2736 }
2737
2738 static void
pipeline_setup_rendering_info(struct v3dv_device * device,struct v3dv_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc)2739 pipeline_setup_rendering_info(struct v3dv_device *device,
2740 struct v3dv_pipeline *pipeline,
2741 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2742 const VkAllocationCallbacks *alloc)
2743 {
2744 struct vk_render_pass_state *rp = &pipeline->rendering_info;
2745
2746 if (pipeline->pass) {
2747 assert(pipeline->subpass);
2748 struct v3dv_render_pass *pass = pipeline->pass;
2749 struct v3dv_subpass *subpass = pipeline->subpass;
2750 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
2751
2752 rp->view_mask = subpass->view_mask;
2753
2754 rp->depth_attachment_format = VK_FORMAT_UNDEFINED;
2755 rp->stencil_attachment_format = VK_FORMAT_UNDEFINED;
2756 rp->attachments = MESA_VK_RP_ATTACHMENT_NONE;
2757 if (attachment_idx != VK_ATTACHMENT_UNUSED) {
2758 VkFormat ds_format = pass->attachments[attachment_idx].desc.format;
2759 if (vk_format_has_depth(ds_format)) {
2760 rp->depth_attachment_format = ds_format;
2761 rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
2762 }
2763 if (vk_format_has_stencil(ds_format)) {
2764 rp->stencil_attachment_format = ds_format;
2765 rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
2766 }
2767 }
2768
2769 rp->color_attachment_count = subpass->color_count;
2770 for (uint32_t i = 0; i < subpass->color_count; i++) {
2771 const uint32_t attachment_idx = subpass->color_attachments[i].attachment;
2772 if (attachment_idx == VK_ATTACHMENT_UNUSED) {
2773 rp->color_attachment_formats[i] = VK_FORMAT_UNDEFINED;
2774 continue;
2775 }
2776 rp->color_attachment_formats[i] =
2777 pass->attachments[attachment_idx].desc.format;
2778 rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
2779 }
2780 return;
2781 }
2782
2783 const VkPipelineRenderingCreateInfo *ri =
2784 vk_find_struct_const(pCreateInfo->pNext,
2785 PIPELINE_RENDERING_CREATE_INFO);
2786 if (ri) {
2787 rp->view_mask = ri->viewMask;
2788
2789 rp->color_attachment_count = ri->colorAttachmentCount;
2790 for (int i = 0; i < ri->colorAttachmentCount; i++) {
2791 rp->color_attachment_formats[i] = ri->pColorAttachmentFormats[i];
2792 if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) {
2793 rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
2794 }
2795 }
2796
2797 rp->depth_attachment_format = ri->depthAttachmentFormat;
2798 if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
2799 rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
2800
2801 rp->stencil_attachment_format = ri->stencilAttachmentFormat;
2802 if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
2803 rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
2804
2805 return;
2806 }
2807
2808 /* From the Vulkan spec for VkPipelineRenderingCreateInfo:
2809 *
2810 * "if this structure is not specified, and the pipeline does not include
2811 * a VkRenderPass, viewMask and colorAttachmentCount are 0, and
2812 * depthAttachmentFormat and stencilAttachmentFormat are
2813 * VK_FORMAT_UNDEFINED.
2814 */
2815 pipeline->rendering_info = (struct vk_render_pass_state) {
2816 .view_mask = 0,
2817 .attachments = 0,
2818 .color_attachment_count = 0,
2819 .depth_attachment_format = VK_FORMAT_UNDEFINED,
2820 .stencil_attachment_format = VK_FORMAT_UNDEFINED,
2821 };
2822 }
2823
2824 static VkResult
pipeline_init_dynamic_state(struct v3dv_device * device,struct v3dv_pipeline * pipeline,struct vk_graphics_pipeline_all_state * pipeline_all_state,struct vk_graphics_pipeline_state * pipeline_state,const VkGraphicsPipelineCreateInfo * pCreateInfo)2825 pipeline_init_dynamic_state(struct v3dv_device *device,
2826 struct v3dv_pipeline *pipeline,
2827 struct vk_graphics_pipeline_all_state *pipeline_all_state,
2828 struct vk_graphics_pipeline_state *pipeline_state,
2829 const VkGraphicsPipelineCreateInfo *pCreateInfo)
2830 {
2831 VkResult result = VK_SUCCESS;
2832 result = vk_graphics_pipeline_state_fill(&pipeline->device->vk, pipeline_state,
2833 pCreateInfo, &pipeline->rendering_info, 0,
2834 pipeline_all_state, NULL, 0, NULL);
2835 if (result != VK_SUCCESS)
2836 return result;
2837
2838 vk_dynamic_graphics_state_fill(&pipeline->dynamic_graphics_state, pipeline_state);
2839
2840 struct v3dv_dynamic_state *v3dv_dyn = &pipeline->dynamic;
2841 struct vk_dynamic_graphics_state *dyn = &pipeline->dynamic_graphics_state;
2842
2843 if (BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
2844 BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_SCISSORS)) {
2845 /* FIXME: right now we don't support multiViewport so viewporst[0] would
2846 * work now, but would need to change if we allow multiple viewports.
2847 */
2848 v3dv_X(device, viewport_compute_xform)(&dyn->vp.viewports[0],
2849 v3dv_dyn->viewport.scale[0],
2850 v3dv_dyn->viewport.translate[0]);
2851
2852 }
2853
2854 v3dv_dyn->color_write_enable =
2855 (1ull << (4 * V3D_MAX_RENDER_TARGETS(device->devinfo.ver))) - 1;
2856 if (pipeline_state->cb) {
2857 const uint8_t color_writes = pipeline_state->cb->color_write_enables;
2858 v3dv_dyn->color_write_enable = 0;
2859 for (uint32_t i = 0; i < pipeline_state->cb->attachment_count; i++) {
2860 v3dv_dyn->color_write_enable |=
2861 (color_writes & BITFIELD_BIT(i)) ? (0xfu << (i * 4)) : 0;
2862 }
2863 }
2864
2865 return result;
2866 }
2867
2868 static VkResult
pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator)2869 pipeline_init(struct v3dv_pipeline *pipeline,
2870 struct v3dv_device *device,
2871 struct v3dv_pipeline_cache *cache,
2872 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2873 const VkAllocationCallbacks *pAllocator)
2874 {
2875 VkResult result = VK_SUCCESS;
2876
2877 pipeline->device = device;
2878
2879 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);
2880 pipeline->layout = layout;
2881 v3dv_pipeline_layout_ref(pipeline->layout);
2882
2883 V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
2884 if (render_pass) {
2885 assert(pCreateInfo->subpass < render_pass->subpass_count);
2886 pipeline->pass = render_pass;
2887 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2888 }
2889
2890 pipeline_setup_rendering_info(device, pipeline, pCreateInfo, pAllocator);
2891
2892 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2893 pCreateInfo->pInputAssemblyState;
2894 pipeline->topology = vk_to_mesa_prim[ia_info->topology];
2895
2896 struct vk_graphics_pipeline_all_state all;
2897 struct vk_graphics_pipeline_state pipeline_state = { };
2898 result = pipeline_init_dynamic_state(device, pipeline, &all, &pipeline_state,
2899 pCreateInfo);
2900
2901 if (result != VK_SUCCESS) {
2902 /* Caller would already destroy the pipeline, and we didn't allocate any
2903 * extra info. We don't need to do anything else.
2904 */
2905 return result;
2906 }
2907
2908 /* If rasterization is disabled, we just disable it through the CFG_BITS
2909 * packet, so for building the pipeline we always assume it is enabled
2910 */
2911 const bool raster_enabled =
2912 (pipeline_state.rs && !pipeline_state.rs->rasterizer_discard_enable) ||
2913 BITSET_TEST(pipeline_state.dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
2914
2915 pipeline->rasterization_enabled = raster_enabled;
2916
2917 const VkPipelineViewportStateCreateInfo *vp_info =
2918 raster_enabled ? pCreateInfo->pViewportState : NULL;
2919
2920 const VkPipelineDepthStencilStateCreateInfo *ds_info =
2921 raster_enabled ? pCreateInfo->pDepthStencilState : NULL;
2922
2923 const VkPipelineRasterizationStateCreateInfo *rs_info =
2924 raster_enabled ? pCreateInfo->pRasterizationState : NULL;
2925
2926 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
2927 raster_enabled ? vk_find_struct_const(
2928 rs_info->pNext,
2929 PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
2930 NULL;
2931
2932 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info =
2933 raster_enabled ? vk_find_struct_const(
2934 rs_info->pNext,
2935 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) :
2936 NULL;
2937
2938 const VkPipelineColorBlendStateCreateInfo *cb_info =
2939 raster_enabled ? pCreateInfo->pColorBlendState : NULL;
2940
2941 const VkPipelineMultisampleStateCreateInfo *ms_info =
2942 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2943
2944 const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control =
2945 vp_info ? vk_find_struct_const(vp_info->pNext,
2946 PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT) :
2947 NULL;
2948
2949 if (depth_clip_control)
2950 pipeline->negative_one_to_one = depth_clip_control->negativeOneToOne;
2951
2952 v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
2953 rs_info, pv_info, ls_info,
2954 ms_info,
2955 &pipeline_state);
2956
2957 pipeline_set_sample_mask(pipeline, ms_info);
2958 pipeline_set_sample_rate_shading(pipeline, ms_info);
2959 pipeline->line_smooth = enable_line_smooth(pipeline, rs_info);
2960
2961 result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
2962
2963 if (result != VK_SUCCESS) {
2964 /* Caller would already destroy the pipeline, and we didn't allocate any
2965 * extra info. We don't need to do anything else.
2966 */
2967 return result;
2968 }
2969
2970 const VkPipelineVertexInputStateCreateInfo *vi_info =
2971 pCreateInfo->pVertexInputState;
2972
2973 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
2974 vk_find_struct_const(vi_info->pNext,
2975 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2976
2977 v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
2978
2979 if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) {
2980 pipeline->default_attribute_values =
2981 v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline);
2982
2983 if (!pipeline->default_attribute_values)
2984 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2985 } else {
2986 pipeline->default_attribute_values = NULL;
2987 }
2988
2989 /* This must be done after the pipeline has been compiled */
2990 v3dv_compute_ez_state(&pipeline->dynamic_graphics_state,
2991 pipeline,
2992 &pipeline->ez_state,
2993 &pipeline->incompatible_ez_test);
2994
2995 return result;
2996 }
2997
2998 static VkPipelineCreateFlagBits2KHR
pipeline_create_info_get_flags(VkPipelineCreateFlags flags,const void * pNext)2999 pipeline_create_info_get_flags(VkPipelineCreateFlags flags, const void *pNext)
3000 {
3001 const VkPipelineCreateFlags2CreateInfoKHR *flags2 =
3002 vk_find_struct_const(pNext, PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR);
3003 if (flags2)
3004 return flags2->flags;
3005 else
3006 return flags;
3007 }
3008
3009 static VkResult
graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline,VkPipelineCreateFlagBits2KHR * flags)3010 graphics_pipeline_create(VkDevice _device,
3011 VkPipelineCache _cache,
3012 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3013 const VkAllocationCallbacks *pAllocator,
3014 VkPipeline *pPipeline,
3015 VkPipelineCreateFlagBits2KHR *flags)
3016 {
3017 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3018 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3019
3020 struct v3dv_pipeline *pipeline;
3021 VkResult result;
3022
3023 *flags = pipeline_create_info_get_flags(pCreateInfo->flags,
3024 pCreateInfo->pNext);
3025
3026 /* Use the default pipeline cache if none is specified */
3027 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3028 cache = &device->default_pipeline_cache;
3029
3030 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3031 VK_OBJECT_TYPE_PIPELINE);
3032
3033 if (pipeline == NULL)
3034 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3035
3036 pipeline->flags = *flags;
3037 result = pipeline_init(pipeline, device, cache, pCreateInfo, pAllocator);
3038
3039 if (result != VK_SUCCESS) {
3040 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3041 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3042 *pPipeline = VK_NULL_HANDLE;
3043 return result;
3044 }
3045
3046 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3047
3048 return VK_SUCCESS;
3049 }
3050
3051 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3052 v3dv_CreateGraphicsPipelines(VkDevice _device,
3053 VkPipelineCache pipelineCache,
3054 uint32_t count,
3055 const VkGraphicsPipelineCreateInfo *pCreateInfos,
3056 const VkAllocationCallbacks *pAllocator,
3057 VkPipeline *pPipelines)
3058 {
3059 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3060 VkResult result = VK_SUCCESS;
3061
3062 if (V3D_DBG(SHADERS))
3063 mtx_lock(&device->pdevice->mutex);
3064
3065 uint32_t i = 0;
3066 for (; i < count; i++) {
3067 VkResult local_result;
3068
3069 VkPipelineCreateFlagBits2KHR flags;
3070 local_result = graphics_pipeline_create(_device,
3071 pipelineCache,
3072 &pCreateInfos[i],
3073 pAllocator,
3074 &pPipelines[i],
3075 &flags);
3076
3077 if (local_result != VK_SUCCESS) {
3078 result = local_result;
3079 pPipelines[i] = VK_NULL_HANDLE;
3080 if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3081 break;
3082 }
3083 }
3084
3085 for (; i < count; i++)
3086 pPipelines[i] = VK_NULL_HANDLE;
3087
3088 if (V3D_DBG(SHADERS))
3089 mtx_unlock(&device->pdevice->mutex);
3090
3091 return result;
3092 }
3093
3094 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)3095 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
3096 {
3097 assert(glsl_type_is_vector_or_scalar(type));
3098
3099 uint32_t comp_size = glsl_type_is_boolean(type)
3100 ? 4 : glsl_get_bit_size(type) / 8;
3101 unsigned length = glsl_get_vector_elements(type);
3102 *size = comp_size * length,
3103 *align = comp_size * (length == 3 ? 4 : length);
3104 }
3105
3106 static void
lower_compute(struct nir_shader * nir)3107 lower_compute(struct nir_shader *nir)
3108 {
3109 if (!nir->info.shared_memory_explicit_layout) {
3110 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
3111 nir_var_mem_shared, shared_type_info);
3112 }
3113
3114 NIR_PASS(_, nir, nir_lower_explicit_io,
3115 nir_var_mem_shared, nir_address_format_32bit_offset);
3116
3117 struct nir_lower_compute_system_values_options sysval_options = {
3118 .has_base_workgroup_id = true,
3119 };
3120 NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);
3121 }
3122
3123 static VkResult
pipeline_compile_compute(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3124 pipeline_compile_compute(struct v3dv_pipeline *pipeline,
3125 struct v3dv_pipeline_cache *cache,
3126 const VkComputePipelineCreateInfo *info,
3127 const VkAllocationCallbacks *alloc)
3128 {
3129 VkPipelineCreationFeedback pipeline_feedback = {
3130 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3131 };
3132 int64_t pipeline_start = os_time_get_nano();
3133
3134 struct v3dv_device *device = pipeline->device;
3135 struct v3dv_physical_device *physical_device = device->pdevice;
3136
3137 const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
3138 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
3139
3140 struct v3dv_pipeline_stage *p_stage =
3141 vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,
3142 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3143 if (!p_stage)
3144 return VK_ERROR_OUT_OF_HOST_MEMORY;
3145
3146 p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
3147 p_stage->pipeline = pipeline;
3148 p_stage->stage = gl_shader_stage_to_broadcom(stage);
3149 p_stage->entrypoint = sinfo->pName;
3150 p_stage->module = vk_shader_module_from_handle(sinfo->module);
3151 p_stage->spec_info = sinfo->pSpecializationInfo;
3152 p_stage->feedback = (VkPipelineCreationFeedback) { 0 };
3153 if (!p_stage->module) {
3154 p_stage->module_info =
3155 vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
3156 }
3157
3158 vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness,
3159 info->pNext, sinfo->pNext);
3160
3161 vk_pipeline_hash_shader_stage(pipeline->flags,
3162 &info->stage,
3163 &p_stage->robustness,
3164 p_stage->shader_sha1);
3165
3166 p_stage->nir = NULL;
3167
3168 pipeline->stages[BROADCOM_SHADER_COMPUTE] = p_stage;
3169 pipeline->active_stages |= sinfo->stage;
3170
3171 /* First we try to get the variants from the pipeline cache (unless we are
3172 * required to capture internal representations, since in that case we need
3173 * compile).
3174 */
3175 bool needs_executable_info =
3176 pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
3177 if (!needs_executable_info) {
3178 struct v3dv_pipeline_key pipeline_key;
3179 pipeline_populate_compute_key(pipeline, &pipeline_key, info);
3180 pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1);
3181
3182 bool cache_hit = false;
3183 pipeline->shared_data =
3184 v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit);
3185
3186 if (pipeline->shared_data != NULL) {
3187 assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
3188 if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
3189 pipeline_feedback.flags |=
3190 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3191
3192 goto success;
3193 }
3194 }
3195
3196 if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
3197 return VK_PIPELINE_COMPILE_REQUIRED;
3198
3199 pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1,
3200 pipeline,
3201 false);
3202 if (!pipeline->shared_data)
3203 return VK_ERROR_OUT_OF_HOST_MEMORY;
3204
3205 p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
3206
3207 /* If not found on cache, compile it */
3208 p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
3209 assert(p_stage->nir);
3210
3211 v3d_optimize_nir(NULL, p_stage->nir);
3212 pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
3213 lower_compute(p_stage->nir);
3214
3215 VkResult result = VK_SUCCESS;
3216
3217 struct v3d_key key;
3218 memset(&key, 0, sizeof(key));
3219 pipeline_populate_v3d_key(&key, p_stage, 0);
3220 pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =
3221 pipeline_compile_shader_variant(p_stage, &key, sizeof(key),
3222 alloc, &result);
3223
3224 if (result != VK_SUCCESS)
3225 return result;
3226
3227 if (!upload_assembly(pipeline))
3228 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3229
3230 v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
3231
3232 success:
3233
3234 pipeline_check_buffer_device_address(pipeline);
3235
3236 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3237 write_creation_feedback(pipeline,
3238 info->pNext,
3239 &pipeline_feedback,
3240 1,
3241 &info->stage);
3242
3243 /* As we got the variants in pipeline->shared_data, after compiling we
3244 * don't need the pipeline_stages.
3245 */
3246 if (!needs_executable_info)
3247 pipeline_free_stages(device, pipeline, alloc);
3248
3249 pipeline_check_spill_size(pipeline);
3250
3251 return VK_SUCCESS;
3252 }
3253
3254 static VkResult
compute_pipeline_init(struct v3dv_pipeline * pipeline,struct v3dv_device * device,struct v3dv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const VkAllocationCallbacks * alloc)3255 compute_pipeline_init(struct v3dv_pipeline *pipeline,
3256 struct v3dv_device *device,
3257 struct v3dv_pipeline_cache *cache,
3258 const VkComputePipelineCreateInfo *info,
3259 const VkAllocationCallbacks *alloc)
3260 {
3261 V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);
3262
3263 pipeline->device = device;
3264 pipeline->layout = layout;
3265 v3dv_pipeline_layout_ref(pipeline->layout);
3266
3267 VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
3268 if (result != VK_SUCCESS)
3269 return result;
3270
3271 return result;
3272 }
3273
3274 static VkResult
compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline,VkPipelineCreateFlagBits2KHR * flags)3275 compute_pipeline_create(VkDevice _device,
3276 VkPipelineCache _cache,
3277 const VkComputePipelineCreateInfo *pCreateInfo,
3278 const VkAllocationCallbacks *pAllocator,
3279 VkPipeline *pPipeline,
3280 VkPipelineCreateFlagBits2KHR *flags)
3281 {
3282 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3283 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
3284
3285 struct v3dv_pipeline *pipeline;
3286 VkResult result;
3287
3288 *flags = pipeline_create_info_get_flags(pCreateInfo->flags,
3289 pCreateInfo->pNext);
3290
3291 /* Use the default pipeline cache if none is specified */
3292 if (cache == NULL && device->instance->default_pipeline_cache_enabled)
3293 cache = &device->default_pipeline_cache;
3294
3295 pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
3296 VK_OBJECT_TYPE_PIPELINE);
3297 if (pipeline == NULL)
3298 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3299
3300 pipeline->flags = *flags;
3301 result = compute_pipeline_init(pipeline, device, cache,
3302 pCreateInfo, pAllocator);
3303 if (result != VK_SUCCESS) {
3304 v3dv_destroy_pipeline(pipeline, device, pAllocator);
3305 if (result == VK_PIPELINE_COMPILE_REQUIRED)
3306 *pPipeline = VK_NULL_HANDLE;
3307 return result;
3308 }
3309
3310 *pPipeline = v3dv_pipeline_to_handle(pipeline);
3311
3312 return VK_SUCCESS;
3313 }
3314
3315 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3316 v3dv_CreateComputePipelines(VkDevice _device,
3317 VkPipelineCache pipelineCache,
3318 uint32_t createInfoCount,
3319 const VkComputePipelineCreateInfo *pCreateInfos,
3320 const VkAllocationCallbacks *pAllocator,
3321 VkPipeline *pPipelines)
3322 {
3323 V3DV_FROM_HANDLE(v3dv_device, device, _device);
3324 VkResult result = VK_SUCCESS;
3325
3326 if (V3D_DBG(SHADERS))
3327 mtx_lock(&device->pdevice->mutex);
3328
3329 uint32_t i = 0;
3330 for (; i < createInfoCount; i++) {
3331 VkResult local_result;
3332 VkPipelineCreateFlagBits2KHR flags;
3333 local_result = compute_pipeline_create(_device,
3334 pipelineCache,
3335 &pCreateInfos[i],
3336 pAllocator,
3337 &pPipelines[i],
3338 &flags);
3339
3340 if (local_result != VK_SUCCESS) {
3341 result = local_result;
3342 pPipelines[i] = VK_NULL_HANDLE;
3343 if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
3344 break;
3345 }
3346 }
3347
3348 for (; i < createInfoCount; i++)
3349 pPipelines[i] = VK_NULL_HANDLE;
3350
3351 if (V3D_DBG(SHADERS))
3352 mtx_unlock(&device->pdevice->mutex);
3353
3354 return result;
3355 }
3356
3357 static nir_shader *
pipeline_get_nir(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3358 pipeline_get_nir(struct v3dv_pipeline *pipeline,
3359 enum broadcom_shader_stage stage)
3360 {
3361 assert(stage >= 0 && stage < BROADCOM_SHADER_STAGES);
3362 if (pipeline->stages[stage])
3363 return pipeline->stages[stage]->nir;
3364
3365 return NULL;
3366 }
3367
3368 static struct v3d_prog_data *
pipeline_get_prog_data(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)3369 pipeline_get_prog_data(struct v3dv_pipeline *pipeline,
3370 enum broadcom_shader_stage stage)
3371 {
3372 if (pipeline->shared_data->variants[stage])
3373 return pipeline->shared_data->variants[stage]->prog_data.base;
3374 return NULL;
3375 }
3376
3377 static uint64_t *
pipeline_get_qpu(struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,uint32_t * qpu_size)3378 pipeline_get_qpu(struct v3dv_pipeline *pipeline,
3379 enum broadcom_shader_stage stage,
3380 uint32_t *qpu_size)
3381 {
3382 struct v3dv_shader_variant *variant =
3383 pipeline->shared_data->variants[stage];
3384 if (!variant) {
3385 *qpu_size = 0;
3386 return NULL;
3387 }
3388
3389 *qpu_size = variant->qpu_insts_size;
3390 return variant->qpu_insts;
3391 }
3392
3393 /* FIXME: we use the same macro in various drivers, maybe move it to
3394 * the common vk_util.h?
3395 */
3396 #define WRITE_STR(field, ...) ({ \
3397 memset(field, 0, sizeof(field)); \
3398 UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
3399 assert(_i > 0 && _i < sizeof(field)); \
3400 })
3401
3402 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3403 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3404 const char *data)
3405 {
3406 ir->isText = VK_TRUE;
3407
3408 size_t data_len = strlen(data) + 1;
3409
3410 if (ir->pData == NULL) {
3411 ir->dataSize = data_len;
3412 return true;
3413 }
3414
3415 strncpy(ir->pData, data, ir->dataSize);
3416 if (ir->dataSize < data_len)
3417 return false;
3418
3419 ir->dataSize = data_len;
3420 return true;
3421 }
3422
3423 static void
append(char ** str,size_t * offset,const char * fmt,...)3424 append(char **str, size_t *offset, const char *fmt, ...)
3425 {
3426 va_list args;
3427 va_start(args, fmt);
3428 ralloc_vasprintf_rewrite_tail(str, offset, fmt, args);
3429 va_end(args);
3430 }
3431
3432 static void
pipeline_collect_executable_data(struct v3dv_pipeline * pipeline)3433 pipeline_collect_executable_data(struct v3dv_pipeline *pipeline)
3434 {
3435 if (pipeline->executables.mem_ctx)
3436 return;
3437
3438 pipeline->executables.mem_ctx = ralloc_context(NULL);
3439 util_dynarray_init(&pipeline->executables.data,
3440 pipeline->executables.mem_ctx);
3441
3442 /* Don't crash for failed/bogus pipelines */
3443 if (!pipeline->shared_data)
3444 return;
3445
3446 for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) {
3447 VkShaderStageFlags vk_stage =
3448 mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s));
3449 if (!(vk_stage & pipeline->active_stages))
3450 continue;
3451
3452 char *nir_str = NULL;
3453 char *qpu_str = NULL;
3454
3455 if (pipeline_keep_qpu(pipeline)) {
3456 nir_shader *nir = pipeline_get_nir(pipeline, s);
3457 nir_str = nir ?
3458 nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL;
3459
3460 uint32_t qpu_size;
3461 uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size);
3462 if (qpu) {
3463 uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t);
3464 qpu_str = rzalloc_size(pipeline->executables.mem_ctx,
3465 qpu_inst_count * 96);
3466 size_t offset = 0;
3467 for (int i = 0; i < qpu_inst_count; i++) {
3468 const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]);
3469 append(&qpu_str, &offset, "%s\n", str);
3470 ralloc_free((void *)str);
3471 }
3472 }
3473 }
3474
3475 struct v3dv_pipeline_executable_data data = {
3476 .stage = s,
3477 .nir_str = nir_str,
3478 .qpu_str = qpu_str,
3479 };
3480 util_dynarray_append(&pipeline->executables.data,
3481 struct v3dv_pipeline_executable_data, data);
3482 }
3483 }
3484
3485 static const struct v3dv_pipeline_executable_data *
pipeline_get_executable(struct v3dv_pipeline * pipeline,uint32_t index)3486 pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index)
3487 {
3488 assert(index < util_dynarray_num_elements(&pipeline->executables.data,
3489 struct v3dv_pipeline_executable_data));
3490 return util_dynarray_element(&pipeline->executables.data,
3491 struct v3dv_pipeline_executable_data,
3492 index);
3493 }
3494
3495 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3496 v3dv_GetPipelineExecutableInternalRepresentationsKHR(
3497 VkDevice device,
3498 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3499 uint32_t *pInternalRepresentationCount,
3500 VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
3501 {
3502 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3503
3504 pipeline_collect_executable_data(pipeline);
3505
3506 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
3507 pInternalRepresentations, pInternalRepresentationCount);
3508
3509 bool incomplete = false;
3510 const struct v3dv_pipeline_executable_data *exe =
3511 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3512
3513 if (exe->nir_str) {
3514 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3515 &out, ir) {
3516 WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage));
3517 WRITE_STR(ir->description, "Final NIR form");
3518 if (!write_ir_text(ir, exe->nir_str))
3519 incomplete = true;
3520 }
3521 }
3522
3523 if (exe->qpu_str) {
3524 vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR,
3525 &out, ir) {
3526 WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage));
3527 WRITE_STR(ir->description, "Final QPU assembly");
3528 if (!write_ir_text(ir, exe->qpu_str))
3529 incomplete = true;
3530 }
3531 }
3532
3533 return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out);
3534 }
3535
3536 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3537 v3dv_GetPipelineExecutablePropertiesKHR(
3538 VkDevice device,
3539 const VkPipelineInfoKHR *pPipelineInfo,
3540 uint32_t *pExecutableCount,
3541 VkPipelineExecutablePropertiesKHR *pProperties)
3542 {
3543 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline);
3544
3545 pipeline_collect_executable_data(pipeline);
3546
3547 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
3548 pProperties, pExecutableCount);
3549
3550 util_dynarray_foreach(&pipeline->executables.data,
3551 struct v3dv_pipeline_executable_data, exe) {
3552 vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
3553 gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage);
3554 props->stages = mesa_to_vk_shader_stage(mesa_stage);
3555
3556 WRITE_STR(props->name, "%s (%s)",
3557 _mesa_shader_stage_to_abbrev(mesa_stage),
3558 broadcom_shader_stage_is_binning(exe->stage) ?
3559 "Binning" : "Render");
3560
3561 WRITE_STR(props->description, "%s",
3562 _mesa_shader_stage_to_string(mesa_stage));
3563
3564 props->subgroupSize = V3D_CHANNELS;
3565 }
3566 }
3567
3568 return vk_outarray_status(&out);
3569 }
3570
3571 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3572 v3dv_GetPipelineExecutableStatisticsKHR(
3573 VkDevice device,
3574 const VkPipelineExecutableInfoKHR *pExecutableInfo,
3575 uint32_t *pStatisticCount,
3576 VkPipelineExecutableStatisticKHR *pStatistics)
3577 {
3578 V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline);
3579
3580 pipeline_collect_executable_data(pipeline);
3581
3582 const struct v3dv_pipeline_executable_data *exe =
3583 pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3584
3585 struct v3d_prog_data *prog_data =
3586 pipeline_get_prog_data(pipeline, exe->stage);
3587
3588 struct v3dv_shader_variant *variant =
3589 pipeline->shared_data->variants[exe->stage];
3590 uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t);
3591
3592 VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
3593 pStatistics, pStatisticCount);
3594
3595 if (qpu_inst_count > 0) {
3596 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3597 WRITE_STR(stat->name, "Compile Strategy");
3598 WRITE_STR(stat->description, "Chosen compile strategy index");
3599 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3600 stat->value.u64 = prog_data->compile_strategy_idx;
3601 }
3602
3603 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3604 WRITE_STR(stat->name, "Instruction Count");
3605 WRITE_STR(stat->description, "Number of QPU instructions");
3606 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3607 stat->value.u64 = qpu_inst_count;
3608 }
3609
3610 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3611 WRITE_STR(stat->name, "Thread Count");
3612 WRITE_STR(stat->description, "Number of QPU threads dispatched");
3613 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3614 stat->value.u64 = prog_data->threads;
3615 }
3616
3617 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3618 WRITE_STR(stat->name, "Spill Size");
3619 WRITE_STR(stat->description, "Size of the spill buffer in bytes");
3620 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3621 stat->value.u64 = prog_data->spill_size;
3622 }
3623
3624 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3625 WRITE_STR(stat->name, "TMU Spills");
3626 WRITE_STR(stat->description, "Number of times a register was spilled "
3627 "to memory");
3628 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3629 stat->value.u64 = prog_data->spill_size;
3630 }
3631
3632 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3633 WRITE_STR(stat->name, "TMU Fills");
3634 WRITE_STR(stat->description, "Number of times a register was filled "
3635 "from memory");
3636 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3637 stat->value.u64 = prog_data->spill_size;
3638 }
3639
3640 vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
3641 WRITE_STR(stat->name, "QPU Read Stalls");
3642 WRITE_STR(stat->description, "Number of cycles the QPU stalls for a "
3643 "register read dependency");
3644 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3645 stat->value.u64 = prog_data->qpu_read_stalls;
3646 }
3647 }
3648
3649 return vk_outarray_status(&out);
3650 }
3651