xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_pipeline_rt.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Google
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "nir/nir.h"
8 #include "nir/nir_builder.h"
9 #include "nir/nir_serialize.h"
10 
11 #include "vk_shader_module.h"
12 
13 #include "nir/radv_nir.h"
14 #include "radv_debug.h"
15 #include "radv_descriptor_set.h"
16 #include "radv_entrypoints.h"
17 #include "radv_pipeline_binary.h"
18 #include "radv_pipeline_cache.h"
19 #include "radv_pipeline_rt.h"
20 #include "radv_rmv.h"
21 #include "radv_shader.h"
22 
23 struct rt_handle_hash_entry {
24    uint32_t key;
25    char hash[20];
26 };
27 
28 static uint32_t
handle_from_stages(struct radv_device * device,const unsigned char * shader_sha1,bool replay_namespace)29 handle_from_stages(struct radv_device *device, const unsigned char *shader_sha1, bool replay_namespace)
30 {
31    uint32_t ret;
32 
33    memcpy(&ret, shader_sha1, sizeof(ret));
34 
35    /* Leave the low half for resume shaders etc. */
36    ret |= 1u << 31;
37 
38    /* Ensure we have dedicated space for replayable shaders */
39    ret &= ~(1u << 30);
40    ret |= replay_namespace << 30;
41 
42    simple_mtx_lock(&device->rt_handles_mtx);
43 
44    struct hash_entry *he = NULL;
45    for (;;) {
46       he = _mesa_hash_table_search(device->rt_handles, &ret);
47       if (!he)
48          break;
49 
50       if (memcmp(he->data, shader_sha1, SHA1_DIGEST_LENGTH) == 0)
51          break;
52 
53       ++ret;
54    }
55 
56    if (!he) {
57       struct rt_handle_hash_entry *e = ralloc(device->rt_handles, struct rt_handle_hash_entry);
58       e->key = ret;
59       memcpy(e->hash, shader_sha1, SHA1_DIGEST_LENGTH);
60       _mesa_hash_table_insert(device->rt_handles, &e->key, &e->hash);
61    }
62 
63    simple_mtx_unlock(&device->rt_handles_mtx);
64 
65    return ret;
66 }
67 
68 static void
radv_generate_rt_shaders_key(const struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_shader_stage_key * stage_keys)69 radv_generate_rt_shaders_key(const struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
70                              struct radv_shader_stage_key *stage_keys)
71 {
72    VkPipelineCreateFlags2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
73 
74    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
75       const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
76       gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
77 
78       stage_keys[s] = radv_pipeline_get_shader_key(device, stage, create_flags, pCreateInfo->pNext);
79    }
80 
81    if (pCreateInfo->pLibraryInfo) {
82       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
83          VK_FROM_HANDLE(radv_pipeline, pipeline_lib, pCreateInfo->pLibraryInfo->pLibraries[i]);
84          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline_lib);
85          /* apply shader robustness from merged shaders */
86          if (library_pipeline->traversal_storage_robustness2)
87             stage_keys[MESA_SHADER_INTERSECTION].storage_robustness2 = true;
88 
89          if (library_pipeline->traversal_uniform_robustness2)
90             stage_keys[MESA_SHADER_INTERSECTION].uniform_robustness2 = true;
91       }
92    }
93 }
94 
95 static VkResult
radv_create_group_handles(struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const struct radv_ray_tracing_stage * stages,struct radv_ray_tracing_group * groups)96 radv_create_group_handles(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
97                           const struct radv_ray_tracing_stage *stages, struct radv_ray_tracing_group *groups)
98 {
99    VkPipelineCreateFlags2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
100    bool capture_replay = create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
101    for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
102       const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
103       switch (group_info->type) {
104       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
105          if (group_info->generalShader != VK_SHADER_UNUSED_KHR) {
106             const struct radv_ray_tracing_stage *stage = &stages[group_info->generalShader];
107             groups[i].handle.general_index = handle_from_stages(device, stage->sha1, capture_replay);
108          }
109          break;
110       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
111          if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR) {
112             const struct radv_ray_tracing_stage *stage = &stages[group_info->closestHitShader];
113             groups[i].handle.closest_hit_index = handle_from_stages(device, stage->sha1, capture_replay);
114          }
115 
116          if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR) {
117             unsigned char sha1[SHA1_DIGEST_LENGTH];
118             struct mesa_sha1 ctx;
119 
120             _mesa_sha1_init(&ctx);
121             _mesa_sha1_update(&ctx, stages[group_info->intersectionShader].sha1, SHA1_DIGEST_LENGTH);
122             if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
123                _mesa_sha1_update(&ctx, stages[group_info->anyHitShader].sha1, SHA1_DIGEST_LENGTH);
124             _mesa_sha1_final(&ctx, sha1);
125 
126             groups[i].handle.intersection_index = handle_from_stages(device, sha1, capture_replay);
127          }
128          break;
129       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
130          if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR) {
131             const struct radv_ray_tracing_stage *stage = &stages[group_info->closestHitShader];
132             groups[i].handle.closest_hit_index = handle_from_stages(device, stage->sha1, capture_replay);
133          }
134 
135          if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR) {
136             const struct radv_ray_tracing_stage *stage = &stages[group_info->anyHitShader];
137             groups[i].handle.any_hit_index = handle_from_stages(device, stage->sha1, capture_replay);
138          }
139          break;
140       case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR:
141          unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR");
142       }
143 
144       if (group_info->pShaderGroupCaptureReplayHandle) {
145          const struct radv_rt_capture_replay_handle *handle = group_info->pShaderGroupCaptureReplayHandle;
146          if (memcmp(&handle->non_recursive_idx, &groups[i].handle.any_hit_index, sizeof(uint32_t)) != 0) {
147             return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
148          }
149       }
150    }
151 
152    return VK_SUCCESS;
153 }
154 
155 static VkResult
radv_rt_init_capture_replay(struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const struct radv_ray_tracing_stage * stages,const struct radv_ray_tracing_group * groups,struct radv_serialized_shader_arena_block * capture_replay_blocks)156 radv_rt_init_capture_replay(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
157                             const struct radv_ray_tracing_stage *stages, const struct radv_ray_tracing_group *groups,
158                             struct radv_serialized_shader_arena_block *capture_replay_blocks)
159 {
160    VkResult result = VK_SUCCESS;
161    uint32_t idx;
162 
163    for (idx = 0; idx < pCreateInfo->groupCount; idx++) {
164       if (!pCreateInfo->pGroups[idx].pShaderGroupCaptureReplayHandle)
165          continue;
166 
167       const struct radv_rt_capture_replay_handle *handle =
168          (const struct radv_rt_capture_replay_handle *)pCreateInfo->pGroups[idx].pShaderGroupCaptureReplayHandle;
169 
170       if (groups[idx].recursive_shader < pCreateInfo->stageCount) {
171          capture_replay_blocks[groups[idx].recursive_shader] = handle->recursive_shader_alloc;
172       } else if (groups[idx].recursive_shader != VK_SHADER_UNUSED_KHR) {
173          struct radv_shader *library_shader = stages[groups[idx].recursive_shader].shader;
174          simple_mtx_lock(&library_shader->replay_mtx);
175          /* If arena_va is 0, the pipeline is monolithic and the shader was inlined into raygen */
176          if (!library_shader->has_replay_alloc && handle->recursive_shader_alloc.arena_va) {
177             union radv_shader_arena_block *new_block =
178                radv_replay_shader_arena_block(device, &handle->recursive_shader_alloc, library_shader);
179             if (!new_block) {
180                result = VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
181                goto reloc_out;
182             }
183 
184             radv_shader_wait_for_upload(device, library_shader->upload_seq);
185             radv_free_shader_memory(device, library_shader->alloc);
186 
187             library_shader->alloc = new_block;
188             library_shader->has_replay_alloc = true;
189 
190             library_shader->bo = library_shader->alloc->arena->bo;
191             library_shader->va = radv_buffer_get_va(library_shader->bo) + library_shader->alloc->offset;
192 
193             if (!radv_shader_reupload(device, library_shader)) {
194                result = VK_ERROR_UNKNOWN;
195                goto reloc_out;
196             }
197          }
198 
199          reloc_out:
200             simple_mtx_unlock(&library_shader->replay_mtx);
201             if (result != VK_SUCCESS)
202                return result;
203          }
204    }
205 
206    return result;
207 }
208 
209 static VkResult
radv_rt_fill_group_info(struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const struct radv_ray_tracing_stage * stages,struct radv_ray_tracing_group * groups)210 radv_rt_fill_group_info(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
211                         const struct radv_ray_tracing_stage *stages, struct radv_ray_tracing_group *groups)
212 {
213    VkResult result = radv_create_group_handles(device, pCreateInfo, stages, groups);
214 
215    uint32_t idx;
216    for (idx = 0; idx < pCreateInfo->groupCount; idx++) {
217       groups[idx].type = pCreateInfo->pGroups[idx].type;
218       if (groups[idx].type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
219          groups[idx].recursive_shader = pCreateInfo->pGroups[idx].generalShader;
220       else
221          groups[idx].recursive_shader = pCreateInfo->pGroups[idx].closestHitShader;
222       groups[idx].any_hit_shader = pCreateInfo->pGroups[idx].anyHitShader;
223       groups[idx].intersection_shader = pCreateInfo->pGroups[idx].intersectionShader;
224    }
225 
226    /* copy and adjust library groups (incl. handles) */
227    if (pCreateInfo->pLibraryInfo) {
228       unsigned stage_count = pCreateInfo->stageCount;
229       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
230          VK_FROM_HANDLE(radv_pipeline, pipeline_lib, pCreateInfo->pLibraryInfo->pLibraries[i]);
231          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline_lib);
232 
233          for (unsigned j = 0; j < library_pipeline->group_count; ++j) {
234             struct radv_ray_tracing_group *dst = &groups[idx + j];
235             *dst = library_pipeline->groups[j];
236             if (dst->recursive_shader != VK_SHADER_UNUSED_KHR)
237                dst->recursive_shader += stage_count;
238             if (dst->any_hit_shader != VK_SHADER_UNUSED_KHR)
239                dst->any_hit_shader += stage_count;
240             if (dst->intersection_shader != VK_SHADER_UNUSED_KHR)
241                dst->intersection_shader += stage_count;
242             /* Don't set the shader VA since the handles are part of the pipeline hash */
243             dst->handle.recursive_shader_ptr = 0;
244          }
245          idx += library_pipeline->group_count;
246          stage_count += library_pipeline->stage_count;
247       }
248    }
249 
250    return result;
251 }
252 
253 static void
radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_stage * stages)254 radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_stage *stages)
255 {
256    uint32_t idx;
257    for (idx = 0; idx < pCreateInfo->stageCount; idx++)
258       stages[idx].stage = vk_to_mesa_shader_stage(pCreateInfo->pStages[idx].stage);
259 
260    if (pCreateInfo->pLibraryInfo) {
261       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
262          VK_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
263          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
264          for (unsigned j = 0; j < library_pipeline->stage_count; ++j) {
265             if (library_pipeline->stages[j].nir)
266                stages[idx].nir = vk_pipeline_cache_object_ref(library_pipeline->stages[j].nir);
267             if (library_pipeline->stages[j].shader)
268                stages[idx].shader = radv_shader_ref(library_pipeline->stages[j].shader);
269 
270             stages[idx].stage = library_pipeline->stages[j].stage;
271             stages[idx].stack_size = library_pipeline->stages[j].stack_size;
272             stages[idx].info = library_pipeline->stages[j].info;
273             memcpy(stages[idx].sha1, library_pipeline->stages[j].sha1, SHA1_DIGEST_LENGTH);
274             idx++;
275          }
276       }
277    }
278 }
279 
280 static void
radv_init_rt_stage_hashes(const struct radv_device * device,VkPipelineCreateFlags2KHR pipeline_flags,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_stage * stages,const struct radv_shader_stage_key * stage_keys)281 radv_init_rt_stage_hashes(const struct radv_device *device,
282                           VkPipelineCreateFlags2KHR pipeline_flags,
283                           const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
284                           struct radv_ray_tracing_stage *stages, const struct radv_shader_stage_key *stage_keys)
285 {
286    const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
287    if (binary_info && binary_info->binaryCount > 0) {
288       for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
289          VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
290          struct blob_reader blob;
291 
292          blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
293 
294          const struct radv_ray_tracing_binary_header *header =
295             (const struct radv_ray_tracing_binary_header *)blob_read_bytes(&blob, sizeof(*header));
296 
297          if (header->is_traversal_shader)
298             continue;
299 
300          memcpy(stages[i].sha1, header->stage_sha1, SHA1_DIGEST_LENGTH);
301       }
302    } else {
303       for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
304          const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[idx];
305          gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage);
306          struct mesa_sha1 ctx;
307 
308          _mesa_sha1_init(&ctx);
309          radv_pipeline_hash_shader_stage(pipeline_flags, sinfo, &stage_keys[s], &ctx);
310          _mesa_sha1_final(&ctx, stages[idx].sha1);
311       }
312    }
313 }
314 
315 static bool
should_move_rt_instruction(nir_intrinsic_instr * instr)316 should_move_rt_instruction(nir_intrinsic_instr *instr)
317 {
318    switch (instr->intrinsic) {
319    case nir_intrinsic_load_hit_attrib_amd:
320       return nir_intrinsic_base(instr) < RADV_MAX_HIT_ATTRIB_DWORDS;
321    case nir_intrinsic_load_rt_arg_scratch_offset_amd:
322    case nir_intrinsic_load_ray_flags:
323    case nir_intrinsic_load_ray_object_origin:
324    case nir_intrinsic_load_ray_world_origin:
325    case nir_intrinsic_load_ray_t_min:
326    case nir_intrinsic_load_ray_object_direction:
327    case nir_intrinsic_load_ray_world_direction:
328    case nir_intrinsic_load_ray_t_max:
329       return true;
330    default:
331       return false;
332    }
333 }
334 
335 static void
move_rt_instructions(nir_shader * shader)336 move_rt_instructions(nir_shader *shader)
337 {
338    nir_cursor target = nir_before_impl(nir_shader_get_entrypoint(shader));
339 
340    nir_foreach_block (block, nir_shader_get_entrypoint(shader)) {
341       nir_foreach_instr_safe (instr, block) {
342          if (instr->type != nir_instr_type_intrinsic)
343             continue;
344 
345          nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
346 
347          if (!should_move_rt_instruction(intrinsic))
348             continue;
349 
350          nir_instr_move(target, instr);
351       }
352    }
353 
354    nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_all & (~nir_metadata_instr_index));
355 }
356 
357 static VkResult
radv_rt_nir_to_asm(struct radv_device * device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_pipeline * pipeline,bool monolithic,struct radv_shader_stage * stage,uint32_t * stack_size,struct radv_ray_tracing_stage_info * stage_info,const struct radv_ray_tracing_stage_info * traversal_stage_info,struct radv_serialized_shader_arena_block * replay_block,struct radv_shader ** out_shader)358 radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
359                    const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_pipeline *pipeline,
360                    bool monolithic, struct radv_shader_stage *stage, uint32_t *stack_size,
361                    struct radv_ray_tracing_stage_info *stage_info,
362                    const struct radv_ray_tracing_stage_info *traversal_stage_info,
363                    struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
364 {
365    struct radv_physical_device *pdev = radv_device_physical(device);
366    struct radv_instance *instance = radv_physical_device_instance(pdev);
367 
368    struct radv_shader_binary *binary;
369    bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
370    bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
371 
372    radv_nir_lower_rt_io(stage->nir, monolithic, 0);
373 
374    /* Gather shader info. */
375    nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
376    radv_nir_shader_info_init(stage->stage, MESA_SHADER_NONE, &stage->info);
377    radv_nir_shader_info_pass(device, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING, false,
378                              &stage->info);
379 
380    /* Declare shader arguments. */
381    radv_declare_shader_args(device, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args);
382 
383    stage->info.user_sgprs_locs = stage->args.user_sgprs_locs;
384    stage->info.inline_push_constant_mask = stage->args.ac.inline_push_const_mask;
385 
386    /* Move ray tracing system values to the top that are set by rt_trace_ray
387     * to prevent them from being overwritten by other rt_trace_ray calls.
388     */
389    NIR_PASS_V(stage->nir, move_rt_instructions);
390 
391    uint32_t num_resume_shaders = 0;
392    nir_shader **resume_shaders = NULL;
393 
394    if (stage->stage != MESA_SHADER_INTERSECTION && !monolithic) {
395       nir_builder b = nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(stage->nir)));
396       nir_rt_return_amd(&b);
397 
398       const nir_lower_shader_calls_options opts = {
399          .address_format = nir_address_format_32bit_offset,
400          .stack_alignment = 16,
401          .localized_loads = true,
402          .vectorizer_callback = ac_nir_mem_vectorize_callback,
403          .vectorizer_data = &pdev->info.gfx_level,
404       };
405       nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, stage->nir);
406    }
407 
408    unsigned num_shaders = num_resume_shaders + 1;
409    nir_shader **shaders = ralloc_array(stage->nir, nir_shader *, num_shaders);
410    if (!shaders)
411       return VK_ERROR_OUT_OF_HOST_MEMORY;
412 
413    shaders[0] = stage->nir;
414    for (uint32_t i = 0; i < num_resume_shaders; i++)
415       shaders[i + 1] = resume_shaders[i];
416 
417    if (stage_info)
418       memset(stage_info->unused_args, 0xFF, sizeof(stage_info->unused_args));
419 
420    /* Postprocess shader parts. */
421    for (uint32_t i = 0; i < num_shaders; i++) {
422       struct radv_shader_stage temp_stage = *stage;
423       temp_stage.nir = shaders[i];
424       radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device,
425                             pipeline, monolithic, traversal_stage_info);
426 
427       /* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */
428       nir_shader_gather_info(temp_stage.nir, nir_shader_get_entrypoint(temp_stage.nir));
429 
430       radv_optimize_nir(temp_stage.nir, stage->key.optimisations_disabled);
431       radv_postprocess_nir(device, NULL, &temp_stage);
432 
433       if (stage_info)
434          radv_gather_unused_args(stage_info, shaders[i]);
435    }
436 
437    bool dump_shader = radv_can_dump_shader(device, shaders[0], false);
438    bool replayable =
439       pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
440 
441    if (dump_shader) {
442       simple_mtx_lock(&instance->shader_dump_mtx);
443       for (uint32_t i = 0; i < num_shaders; i++)
444          nir_print_shader(shaders[i], stderr);
445    }
446 
447    /* Compile NIR shader to AMD assembly. */
448    binary =
449       radv_shader_nir_to_asm(device, stage, shaders, num_shaders, NULL, keep_executable_info, keep_statistic_info);
450    struct radv_shader *shader;
451    if (replay_block || replayable) {
452       VkResult result = radv_shader_create_uncached(device, binary, replayable, replay_block, &shader);
453       if (result != VK_SUCCESS) {
454          if (dump_shader)
455             simple_mtx_unlock(&instance->shader_dump_mtx);
456 
457          free(binary);
458          return result;
459       }
460    } else
461       shader = radv_shader_create(device, cache, binary, keep_executable_info || dump_shader);
462 
463    if (shader) {
464       radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, binary, shader, shaders, num_shaders,
465                                       &stage->info);
466 
467       if (shader && keep_executable_info && stage->spirv.size) {
468          shader->spirv = malloc(stage->spirv.size);
469          memcpy(shader->spirv, stage->spirv.data, stage->spirv.size);
470          shader->spirv_size = stage->spirv.size;
471       }
472    }
473 
474    if (dump_shader)
475       simple_mtx_unlock(&instance->shader_dump_mtx);
476 
477    free(binary);
478 
479    *out_shader = shader;
480 
481    if (radv_can_dump_shader_stats(device, stage->nir))
482       radv_dump_shader_stats(device, &pipeline->base.base, shader, stage->nir->info.stage, stderr);
483 
484    return shader ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY;
485 }
486 
487 static void
radv_update_const_info(enum radv_rt_const_arg_state * state,bool equal)488 radv_update_const_info(enum radv_rt_const_arg_state *state, bool equal)
489 {
490    if (*state == RADV_RT_CONST_ARG_STATE_UNINITIALIZED)
491       *state = RADV_RT_CONST_ARG_STATE_VALID;
492    else if (*state == RADV_RT_CONST_ARG_STATE_VALID && !equal)
493       *state = RADV_RT_CONST_ARG_STATE_INVALID;
494 }
495 
496 static void
radv_gather_trace_ray_src(struct radv_rt_const_arg_info * info,nir_src src)497 radv_gather_trace_ray_src(struct radv_rt_const_arg_info *info, nir_src src)
498 {
499    if (nir_src_is_const(src)) {
500       radv_update_const_info(&info->state, info->value == nir_src_as_uint(src));
501       info->value = nir_src_as_uint(src);
502    } else {
503       info->state = RADV_RT_CONST_ARG_STATE_INVALID;
504    }
505 }
506 
507 static void
radv_rt_const_arg_info_combine(struct radv_rt_const_arg_info * dst,const struct radv_rt_const_arg_info * src)508 radv_rt_const_arg_info_combine(struct radv_rt_const_arg_info *dst, const struct radv_rt_const_arg_info *src)
509 {
510    if (src->state != RADV_RT_CONST_ARG_STATE_UNINITIALIZED) {
511       radv_update_const_info(&dst->state, dst->value == src->value);
512       if (src->state == RADV_RT_CONST_ARG_STATE_INVALID)
513          dst->state = RADV_RT_CONST_ARG_STATE_INVALID;
514       dst->value = src->value;
515    }
516 }
517 
518 static struct radv_ray_tracing_stage_info
radv_gather_ray_tracing_stage_info(nir_shader * nir)519 radv_gather_ray_tracing_stage_info(nir_shader *nir)
520 {
521    struct radv_ray_tracing_stage_info info = {
522       .can_inline = true,
523       .set_flags = 0xFFFFFFFF,
524       .unset_flags = 0xFFFFFFFF,
525    };
526 
527    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
528    nir_foreach_block (block, impl) {
529       nir_foreach_instr (instr, block) {
530          if (instr->type != nir_instr_type_intrinsic)
531             continue;
532 
533          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
534          if (intr->intrinsic != nir_intrinsic_trace_ray)
535             continue;
536 
537          info.can_inline = false;
538 
539          radv_gather_trace_ray_src(&info.tmin, intr->src[7]);
540          radv_gather_trace_ray_src(&info.tmax, intr->src[9]);
541          radv_gather_trace_ray_src(&info.sbt_offset, intr->src[3]);
542          radv_gather_trace_ray_src(&info.sbt_stride, intr->src[4]);
543          radv_gather_trace_ray_src(&info.miss_index, intr->src[5]);
544 
545          nir_src flags = intr->src[1];
546          if (nir_src_is_const(flags)) {
547             info.set_flags &= nir_src_as_uint(flags);
548             info.unset_flags &= ~nir_src_as_uint(flags);
549          } else {
550             info.set_flags = 0;
551             info.unset_flags = 0;
552          }
553       }
554    }
555 
556    if (nir->info.stage == MESA_SHADER_RAYGEN || nir->info.stage == MESA_SHADER_ANY_HIT ||
557        nir->info.stage == MESA_SHADER_INTERSECTION)
558       info.can_inline = true;
559    else if (nir->info.stage == MESA_SHADER_CALLABLE)
560       info.can_inline = false;
561 
562    return info;
563 }
564 
565 static inline bool
radv_ray_tracing_stage_is_always_inlined(struct radv_ray_tracing_stage * stage)566 radv_ray_tracing_stage_is_always_inlined(struct radv_ray_tracing_stage *stage)
567 {
568    return stage->stage == MESA_SHADER_ANY_HIT || stage->stage == MESA_SHADER_INTERSECTION;
569 }
570 
571 static VkResult
radv_rt_compile_shaders(struct radv_device * device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkPipelineCreationFeedbackCreateInfo * creation_feedback,const struct radv_shader_stage_key * stage_keys,struct radv_ray_tracing_pipeline * pipeline,struct radv_serialized_shader_arena_block * capture_replay_handles)572 radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache,
573                         const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
574                         const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
575                         const struct radv_shader_stage_key *stage_keys, struct radv_ray_tracing_pipeline *pipeline,
576                         struct radv_serialized_shader_arena_block *capture_replay_handles)
577 {
578    VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
579 
580    if (pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
581       return VK_PIPELINE_COMPILE_REQUIRED;
582    VkResult result = VK_SUCCESS;
583 
584    struct radv_ray_tracing_stage *rt_stages = pipeline->stages;
585 
586    struct radv_shader_stage *stages = calloc(pCreateInfo->stageCount, sizeof(struct radv_shader_stage));
587    if (!stages)
588       return VK_ERROR_OUT_OF_HOST_MEMORY;
589 
590    bool library = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR;
591 
592    bool monolithic = !library;
593    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
594       if (rt_stages[i].shader || rt_stages[i].nir)
595          continue;
596 
597       int64_t stage_start = os_time_get_nano();
598 
599       struct radv_shader_stage *stage = &stages[i];
600       gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
601       radv_pipeline_stage_init(pipeline->base.base.create_flags, &pCreateInfo->pStages[i],
602                                pipeline_layout, &stage_keys[s], stage);
603 
604       /* precompile the shader */
605       stage->nir = radv_shader_spirv_to_nir(device, stage, NULL, false);
606 
607       NIR_PASS(_, stage->nir, radv_nir_lower_hit_attrib_derefs);
608 
609       rt_stages[i].info = radv_gather_ray_tracing_stage_info(stage->nir);
610 
611       stage->feedback.duration = os_time_get_nano() - stage_start;
612    }
613 
614    bool has_callable = false;
615    /* TODO: Recompile recursive raygen shaders instead. */
616    bool raygen_imported = false;
617    for (uint32_t i = 0; i < pipeline->stage_count; i++) {
618       has_callable |= rt_stages[i].stage == MESA_SHADER_CALLABLE;
619       monolithic &= rt_stages[i].info.can_inline;
620 
621       if (i >= pCreateInfo->stageCount)
622          raygen_imported |= rt_stages[i].stage == MESA_SHADER_RAYGEN;
623    }
624 
625    for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
626       if (rt_stages[idx].shader || rt_stages[idx].nir)
627          continue;
628 
629       int64_t stage_start = os_time_get_nano();
630 
631       struct radv_shader_stage *stage = &stages[idx];
632 
633       /* Cases in which we need to keep around the NIR:
634        *    - pipeline library: The final pipeline might be monolithic in which case it will need every NIR shader.
635        *                        If there is a callable shader, we can be sure that the final pipeline won't be
636        *                        monolithic.
637        *    - non-recursive:    Non-recursive shaders are inlined into the traversal shader.
638        *    - monolithic:       Callable shaders (chit/miss) are inlined into the raygen shader.
639        */
640       bool always_inlined = radv_ray_tracing_stage_is_always_inlined(&rt_stages[idx]);
641       bool nir_needed =
642          (library && !has_callable) || always_inlined || (monolithic && rt_stages[idx].stage != MESA_SHADER_RAYGEN);
643       nir_needed &= !rt_stages[idx].nir;
644       if (nir_needed) {
645          const bool cached = !stage->key.optimisations_disabled &&
646                              !(pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR);
647          rt_stages[idx].stack_size = stage->nir->scratch_size;
648          rt_stages[idx].nir = radv_pipeline_cache_nir_to_handle(device, cache, stage->nir, rt_stages[idx].sha1, cached);
649       }
650 
651       stage->feedback.duration += os_time_get_nano() - stage_start;
652    }
653 
654    for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
655       int64_t stage_start = os_time_get_nano();
656       struct radv_shader_stage *stage = &stages[idx];
657 
658       /* Cases in which we need to compile the shader (raygen/callable/chit/miss):
659        *    TODO: - monolithic: Extend the loop to cover imported stages and force compilation of imported raygen
660        *                        shaders since pipeline library shaders use separate compilation.
661        *    - separate:   Compile any recursive stage if wasn't compiled yet.
662        */
663       bool shader_needed = !radv_ray_tracing_stage_is_always_inlined(&rt_stages[idx]) && !rt_stages[idx].shader;
664       if (rt_stages[idx].stage == MESA_SHADER_CLOSEST_HIT || rt_stages[idx].stage == MESA_SHADER_MISS)
665          shader_needed &= !monolithic || raygen_imported;
666 
667       if (shader_needed) {
668          uint32_t stack_size = 0;
669          struct radv_serialized_shader_arena_block *replay_block =
670             capture_replay_handles[idx].arena_va ? &capture_replay_handles[idx] : NULL;
671 
672          bool monolithic_raygen = monolithic && stage->stage == MESA_SHADER_RAYGEN;
673 
674          result = radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, monolithic_raygen, stage, &stack_size,
675                                      &rt_stages[idx].info, NULL, replay_block, &rt_stages[idx].shader);
676          if (result != VK_SUCCESS)
677             goto cleanup;
678 
679          assert(rt_stages[idx].stack_size <= stack_size);
680          rt_stages[idx].stack_size = stack_size;
681       }
682 
683       if (creation_feedback && creation_feedback->pipelineStageCreationFeedbackCount) {
684          assert(idx < creation_feedback->pipelineStageCreationFeedbackCount);
685          stage->feedback.duration += os_time_get_nano() - stage_start;
686          creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage->feedback;
687       }
688    }
689 
690    /* Monolithic raygen shaders do not need a traversal shader. Skip compiling one if there are only monolithic raygen
691     * shaders.
692     */
693    bool traversal_needed = !library && (!monolithic || raygen_imported);
694    if (!traversal_needed) {
695       result = VK_SUCCESS;
696       goto cleanup;
697    }
698 
699    struct radv_ray_tracing_stage_info traversal_info = {
700       .set_flags = 0xFFFFFFFF,
701       .unset_flags = 0xFFFFFFFF,
702    };
703 
704    memset(traversal_info.unused_args, 0xFF, sizeof(traversal_info.unused_args));
705 
706    for (uint32_t i = 0; i < pipeline->stage_count; i++) {
707       if (!pipeline->stages[i].shader)
708          continue;
709 
710       struct radv_ray_tracing_stage_info *info = &pipeline->stages[i].info;
711 
712       BITSET_AND(traversal_info.unused_args, traversal_info.unused_args, info->unused_args);
713 
714       radv_rt_const_arg_info_combine(&traversal_info.tmin, &info->tmin);
715       radv_rt_const_arg_info_combine(&traversal_info.tmax, &info->tmax);
716       radv_rt_const_arg_info_combine(&traversal_info.sbt_offset, &info->sbt_offset);
717       radv_rt_const_arg_info_combine(&traversal_info.sbt_stride, &info->sbt_stride);
718       radv_rt_const_arg_info_combine(&traversal_info.miss_index, &info->miss_index);
719 
720       traversal_info.set_flags &= info->set_flags;
721       traversal_info.unset_flags &= info->unset_flags;
722    }
723 
724    /* create traversal shader */
725    nir_shader *traversal_nir = radv_build_traversal_shader(device, pipeline, pCreateInfo, &traversal_info);
726    struct radv_shader_stage traversal_stage = {
727       .stage = MESA_SHADER_INTERSECTION,
728       .nir = traversal_nir,
729       .key = stage_keys[MESA_SHADER_INTERSECTION],
730    };
731    radv_shader_layout_init(pipeline_layout, MESA_SHADER_INTERSECTION, &traversal_stage.layout);
732    result = radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, false, &traversal_stage, NULL, NULL,
733                                &traversal_info, NULL, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
734    ralloc_free(traversal_nir);
735 
736 cleanup:
737    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
738       ralloc_free(stages[i].nir);
739    free(stages);
740    return result;
741 }
742 
743 static bool
radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo)744 radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
745 {
746    if (!pCreateInfo->pDynamicState)
747       return false;
748 
749    for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) {
750       if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
751          return true;
752    }
753 
754    return false;
755 }
756 
757 static void
compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_pipeline * pipeline)758 compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_pipeline *pipeline)
759 {
760    if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) {
761       pipeline->stack_size = -1u;
762       return;
763    }
764 
765    unsigned raygen_size = 0;
766    unsigned callable_size = 0;
767    unsigned chit_miss_size = 0;
768    unsigned intersection_size = 0;
769    unsigned any_hit_size = 0;
770 
771    for (unsigned i = 0; i < pipeline->stage_count; ++i) {
772       uint32_t size = pipeline->stages[i].stack_size;
773       switch (pipeline->stages[i].stage) {
774       case MESA_SHADER_RAYGEN:
775          raygen_size = MAX2(raygen_size, size);
776          break;
777       case MESA_SHADER_CLOSEST_HIT:
778       case MESA_SHADER_MISS:
779          chit_miss_size = MAX2(chit_miss_size, size);
780          break;
781       case MESA_SHADER_CALLABLE:
782          callable_size = MAX2(callable_size, size);
783          break;
784       case MESA_SHADER_INTERSECTION:
785          intersection_size = MAX2(intersection_size, size);
786          break;
787       case MESA_SHADER_ANY_HIT:
788          any_hit_size = MAX2(any_hit_size, size);
789          break;
790       default:
791          unreachable("Invalid stage type in RT shader");
792       }
793    }
794    pipeline->stack_size =
795       raygen_size +
796       MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * MAX2(chit_miss_size, intersection_size + any_hit_size) +
797       MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * chit_miss_size + 2 * callable_size;
798 }
799 
800 static void
combine_config(struct ac_shader_config * config,struct ac_shader_config * other)801 combine_config(struct ac_shader_config *config, struct ac_shader_config *other)
802 {
803    config->num_sgprs = MAX2(config->num_sgprs, other->num_sgprs);
804    config->num_vgprs = MAX2(config->num_vgprs, other->num_vgprs);
805    config->num_shared_vgprs = MAX2(config->num_shared_vgprs, other->num_shared_vgprs);
806    config->spilled_sgprs = MAX2(config->spilled_sgprs, other->spilled_sgprs);
807    config->spilled_vgprs = MAX2(config->spilled_vgprs, other->spilled_vgprs);
808    config->lds_size = MAX2(config->lds_size, other->lds_size);
809    config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave, other->scratch_bytes_per_wave);
810 
811    assert(config->float_mode == other->float_mode);
812 }
813 
814 static void
postprocess_rt_config(struct ac_shader_config * config,enum amd_gfx_level gfx_level,unsigned wave_size)815 postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_level, unsigned wave_size)
816 {
817    config->rsrc1 =
818       (config->rsrc1 & C_00B848_VGPRS) | S_00B848_VGPRS((config->num_vgprs - 1) / (wave_size == 32 ? 8 : 4));
819    if (gfx_level < GFX10)
820       config->rsrc1 = (config->rsrc1 & C_00B848_SGPRS) | S_00B848_SGPRS((config->num_sgprs - 1) / 8);
821 
822    config->rsrc2 = (config->rsrc2 & C_00B84C_LDS_SIZE) | S_00B84C_LDS_SIZE(config->lds_size);
823    config->rsrc3 = (config->rsrc3 & C_00B8A0_SHARED_VGPR_CNT) | S_00B8A0_SHARED_VGPR_CNT(config->num_shared_vgprs / 8);
824 }
825 
826 static void
compile_rt_prolog(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)827 compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
828 {
829    const struct radv_physical_device *pdev = radv_device_physical(device);
830 
831    pipeline->prolog = radv_create_rt_prolog(device);
832 
833    /* create combined config */
834    struct ac_shader_config *config = &pipeline->prolog->config;
835    for (unsigned i = 0; i < pipeline->stage_count; i++)
836       if (pipeline->stages[i].shader)
837          combine_config(config, &pipeline->stages[i].shader->config);
838 
839    if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
840       combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
841 
842    postprocess_rt_config(config, pdev->info.gfx_level, pdev->rt_wave_size);
843 
844    pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info);
845 }
846 
847 void
radv_ray_tracing_pipeline_hash(const struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const struct radv_ray_tracing_state_key * rt_state,unsigned char * hash)848 radv_ray_tracing_pipeline_hash(const struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
849                                const struct radv_ray_tracing_state_key *rt_state, unsigned char *hash)
850 {
851    VK_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout);
852    struct mesa_sha1 ctx;
853 
854    _mesa_sha1_init(&ctx);
855    radv_pipeline_hash(device, layout, &ctx);
856 
857    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
858       _mesa_sha1_update(&ctx, rt_state->stages[i].sha1, sizeof(rt_state->stages[i].sha1));
859    }
860 
861    for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
862       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].type, sizeof(pCreateInfo->pGroups[i].type));
863       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].generalShader, sizeof(pCreateInfo->pGroups[i].generalShader));
864       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].anyHitShader, sizeof(pCreateInfo->pGroups[i].anyHitShader));
865       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].closestHitShader,
866                         sizeof(pCreateInfo->pGroups[i].closestHitShader));
867       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].intersectionShader,
868                         sizeof(pCreateInfo->pGroups[i].intersectionShader));
869       _mesa_sha1_update(&ctx, &rt_state->groups[i].handle, sizeof(struct radv_pipeline_group_handle));
870    }
871 
872    if (pCreateInfo->pLibraryInfo) {
873       for (uint32_t i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
874          VK_FROM_HANDLE(radv_pipeline, lib_pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
875          struct radv_ray_tracing_pipeline *lib = radv_pipeline_to_ray_tracing(lib_pipeline);
876          _mesa_sha1_update(&ctx, lib->base.base.sha1, SHA1_DIGEST_LENGTH);
877       }
878    }
879 
880    const uint64_t pipeline_flags =
881       vk_rt_pipeline_create_flags(pCreateInfo) &
882       (VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR | VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR |
883        VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR |
884        VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR |
885        VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR |
886        VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_INTERSECTION_SHADERS_BIT_KHR | VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
887    _mesa_sha1_update(&ctx, &pipeline_flags, sizeof(pipeline_flags));
888 
889    _mesa_sha1_final(&ctx, hash);
890 }
891 
892 static VkResult
radv_rt_pipeline_compile(struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_pipeline * pipeline,struct vk_pipeline_cache * cache,const struct radv_ray_tracing_state_key * rt_state,struct radv_serialized_shader_arena_block * capture_replay_blocks,const VkPipelineCreationFeedbackCreateInfo * creation_feedback)893 radv_rt_pipeline_compile(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
894                          struct radv_ray_tracing_pipeline *pipeline, struct vk_pipeline_cache *cache,
895                          const struct radv_ray_tracing_state_key *rt_state,
896                          struct radv_serialized_shader_arena_block *capture_replay_blocks,
897                          const VkPipelineCreationFeedbackCreateInfo *creation_feedback)
898 {
899    const bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
900    const bool emit_ray_history = !!device->rra_trace.ray_history_buffer;
901    VkPipelineCreationFeedback pipeline_feedback = {
902       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
903    };
904    bool skip_shaders_cache = false;
905    VkResult result = VK_SUCCESS;
906 
907    int64_t pipeline_start = os_time_get_nano();
908 
909    radv_ray_tracing_pipeline_hash(device, pCreateInfo, rt_state, pipeline->base.base.sha1);
910    pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline->base.base.sha1;
911 
912    /* Skip the shaders cache when any of the below are true:
913     * - shaders are captured because it's for debugging purposes
914     * - binaries are captured for later uses
915     * - ray history is enabled
916     * - group handles are saved and reused on a subsequent run (ie. capture/replay)
917     */
918    if (keep_executable_info || emit_ray_history ||
919        (pipeline->base.base.create_flags &
920         (VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR |
921          VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR))) {
922       skip_shaders_cache = true;
923    }
924 
925    bool found_in_application_cache = true;
926    if (!skip_shaders_cache &&
927        radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, &found_in_application_cache)) {
928       if (found_in_application_cache)
929          pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
930       result = VK_SUCCESS;
931       goto done;
932    }
933 
934    result = radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, rt_state->stage_keys, pipeline,
935                                     capture_replay_blocks);
936 
937    if (result != VK_SUCCESS)
938       return result;
939 
940    if (!skip_shaders_cache)
941       radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount);
942 
943 done:
944    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
945 
946    if (creation_feedback)
947       *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
948 
949    return result;
950 }
951 
952 void
radv_ray_tracing_state_key_finish(struct radv_ray_tracing_state_key * rt_state)953 radv_ray_tracing_state_key_finish(struct radv_ray_tracing_state_key *rt_state)
954 {
955    free(rt_state->stages);
956    free(rt_state->groups);
957 }
958 
959 VkResult
radv_generate_ray_tracing_state_key(struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_state_key * rt_state)960 radv_generate_ray_tracing_state_key(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
961                                     struct radv_ray_tracing_state_key *rt_state)
962 {
963    VkResult result;
964 
965    memset(rt_state, 0, sizeof(*rt_state));
966 
967    /* Count the total number of stages/groups. */
968    rt_state->stage_count = pCreateInfo->stageCount;
969    rt_state->group_count = pCreateInfo->groupCount;
970 
971    if (pCreateInfo->pLibraryInfo) {
972       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
973          VK_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
974          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
975 
976          rt_state->stage_count += library_pipeline->stage_count;
977          rt_state->group_count += library_pipeline->group_count;
978       }
979    }
980 
981    rt_state->stages = calloc(rt_state->stage_count, sizeof(*rt_state->stages));
982    if (!rt_state->stages)
983       return VK_ERROR_OUT_OF_HOST_MEMORY;
984 
985    rt_state->groups = calloc(rt_state->group_count, sizeof(*rt_state->groups));
986    if (!rt_state->groups) {
987       result = VK_ERROR_OUT_OF_HOST_MEMORY;
988       goto fail;
989    }
990 
991    /* Initialize stages/stage_keys/groups info. */
992    radv_rt_fill_stage_info(pCreateInfo, rt_state->stages);
993 
994    radv_generate_rt_shaders_key(device, pCreateInfo, rt_state->stage_keys);
995 
996    VkPipelineCreateFlags2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
997    radv_init_rt_stage_hashes(device, create_flags, pCreateInfo, rt_state->stages, rt_state->stage_keys);
998 
999    result = radv_rt_fill_group_info(device, pCreateInfo, rt_state->stages, rt_state->groups);
1000    if (result != VK_SUCCESS)
1001       goto fail;
1002 
1003    return VK_SUCCESS;
1004 
1005 fail:
1006    radv_ray_tracing_state_key_finish(rt_state);
1007    return result;
1008 }
1009 
1010 static VkResult
radv_ray_tracing_pipeline_import_binary(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline,const VkPipelineBinaryInfoKHR * binary_info)1011 radv_ray_tracing_pipeline_import_binary(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
1012                                         const VkPipelineBinaryInfoKHR *binary_info)
1013 {
1014    blake3_hash pipeline_hash;
1015    struct mesa_blake3 ctx;
1016 
1017    _mesa_blake3_init(&ctx);
1018 
1019    for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
1020       VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
1021       struct radv_shader *shader;
1022       struct blob_reader blob;
1023 
1024       blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
1025 
1026       const struct radv_ray_tracing_binary_header *header =
1027          (const struct radv_ray_tracing_binary_header *)blob_read_bytes(&blob, sizeof(*header));
1028 
1029       if (header->is_traversal_shader) {
1030          shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
1031          if (!shader)
1032             return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1033 
1034          pipeline->base.base.shaders[MESA_SHADER_INTERSECTION] = shader;
1035 
1036          _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
1037          continue;
1038       }
1039 
1040       memcpy(&pipeline->stages[i].info, &header->stage_info, sizeof(pipeline->stages[i].info));
1041       pipeline->stages[i].stack_size = header->stack_size;
1042 
1043       if (header->has_shader) {
1044          shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
1045          if (!shader)
1046             return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1047 
1048          pipeline->stages[i].shader = shader;
1049 
1050          _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
1051       }
1052 
1053       if (header->has_nir) {
1054          nir_shader *nir = nir_deserialize(NULL, NULL, &blob);
1055 
1056          pipeline->stages[i].nir = radv_pipeline_cache_nir_to_handle(device, NULL, nir, header->stage_sha1, false);
1057          ralloc_free(nir);
1058 
1059          if (!pipeline->stages[i].nir)
1060             return VK_ERROR_OUT_OF_HOST_MEMORY;
1061       }
1062    }
1063 
1064    _mesa_blake3_final(&ctx, pipeline_hash);
1065 
1066    pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline_hash;
1067 
1068    return VK_SUCCESS;
1069 }
1070 
1071 static VkResult
radv_rt_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)1072 radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1073                         const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
1074 {
1075    VK_FROM_HANDLE(radv_device, device, _device);
1076    VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
1077    VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
1078    struct radv_ray_tracing_state_key rt_state;
1079    VkResult result;
1080    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
1081       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1082 
1083    result = radv_generate_ray_tracing_state_key(device, pCreateInfo, &rt_state);
1084    if (result != VK_SUCCESS)
1085       return result;
1086 
1087    VK_MULTIALLOC(ma);
1088    VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_pipeline, pipeline, 1);
1089    VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_stage, stages, rt_state.stage_count);
1090    VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_group, groups, rt_state.group_count);
1091    VK_MULTIALLOC_DECL(&ma, struct radv_serialized_shader_arena_block, capture_replay_blocks, pCreateInfo->stageCount);
1092    if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) {
1093       radv_ray_tracing_state_key_finish(&rt_state);
1094       return VK_ERROR_OUT_OF_HOST_MEMORY;
1095    }
1096 
1097    radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_RAY_TRACING);
1098    pipeline->base.base.create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
1099    pipeline->stage_count = rt_state.stage_count;
1100    pipeline->non_imported_stage_count = pCreateInfo->stageCount;
1101    pipeline->group_count = rt_state.group_count;
1102    pipeline->stages = stages;
1103    pipeline->groups = groups;
1104 
1105    memcpy(pipeline->stages, rt_state.stages, rt_state.stage_count * sizeof(struct radv_ray_tracing_stage));
1106    memcpy(pipeline->groups, rt_state.groups, rt_state.group_count * sizeof(struct radv_ray_tracing_group));
1107 
1108    /* cache robustness state for making merged shaders */
1109    if (rt_state.stage_keys[MESA_SHADER_INTERSECTION].storage_robustness2)
1110       pipeline->traversal_storage_robustness2 = true;
1111 
1112    if (rt_state.stage_keys[MESA_SHADER_INTERSECTION].uniform_robustness2)
1113       pipeline->traversal_uniform_robustness2 = true;
1114 
1115    result = radv_rt_init_capture_replay(device, pCreateInfo, stages, pipeline->groups, capture_replay_blocks);
1116    if (result != VK_SUCCESS)
1117       goto fail;
1118 
1119    const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
1120 
1121    if (binary_info && binary_info->binaryCount > 0) {
1122       result = radv_ray_tracing_pipeline_import_binary(device, pipeline, binary_info);
1123    } else {
1124       result = radv_rt_pipeline_compile(device, pCreateInfo, pipeline, cache, &rt_state, capture_replay_blocks,
1125                                         creation_feedback);
1126       if (result != VK_SUCCESS)
1127          goto fail;
1128    }
1129 
1130    if (!(pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) {
1131       compute_rt_stack_size(pCreateInfo, pipeline);
1132       compile_rt_prolog(device, pipeline);
1133 
1134       radv_compute_pipeline_init(&pipeline->base, pipeline_layout, pipeline->prolog);
1135    }
1136 
1137    /* write shader VAs into group handles */
1138    for (unsigned i = 0; i < pipeline->group_count; i++) {
1139       if (pipeline->groups[i].recursive_shader != VK_SHADER_UNUSED_KHR) {
1140          struct radv_shader *shader = pipeline->stages[pipeline->groups[i].recursive_shader].shader;
1141          if (shader)
1142             pipeline->groups[i].handle.recursive_shader_ptr = shader->va | radv_get_rt_priority(shader->info.stage);
1143       }
1144    }
1145 
1146    *pPipeline = radv_pipeline_to_handle(&pipeline->base.base);
1147    radv_rmv_log_rt_pipeline_create(device, pipeline);
1148 
1149    radv_ray_tracing_state_key_finish(&rt_state);
1150    return result;
1151 
1152 fail:
1153    radv_ray_tracing_state_key_finish(&rt_state);
1154    radv_pipeline_destroy(device, &pipeline->base.base, pAllocator);
1155    return result;
1156 }
1157 
1158 void
radv_destroy_ray_tracing_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)1159 radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
1160 {
1161    for (unsigned i = 0; i < pipeline->stage_count; i++) {
1162       if (pipeline->stages[i].nir)
1163          vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].nir);
1164       if (pipeline->stages[i].shader)
1165          radv_shader_unref(device, pipeline->stages[i].shader);
1166    }
1167 
1168    if (pipeline->prolog)
1169       radv_shader_unref(device, pipeline->prolog);
1170    if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
1171       radv_shader_unref(device, pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
1172 }
1173 
1174 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1175 radv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1176                                   VkPipelineCache pipelineCache, uint32_t count,
1177                                   const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
1178                                   const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
1179 {
1180    VkResult result = VK_SUCCESS;
1181 
1182    unsigned i = 0;
1183    for (; i < count; i++) {
1184       VkResult r;
1185       r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
1186       if (r != VK_SUCCESS) {
1187          result = r;
1188          pPipelines[i] = VK_NULL_HANDLE;
1189 
1190          const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
1191          if (create_flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
1192             break;
1193       }
1194    }
1195 
1196    for (; i < count; ++i)
1197       pPipelines[i] = VK_NULL_HANDLE;
1198 
1199    if (result != VK_SUCCESS)
1200       return result;
1201 
1202    /* Work around Portal RTX not handling VK_OPERATION_NOT_DEFERRED_KHR correctly. */
1203    if (deferredOperation != VK_NULL_HANDLE)
1204       return VK_OPERATION_DEFERRED_KHR;
1205 
1206    return result;
1207 }
1208 
1209 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)1210 radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, uint32_t groupCount,
1211                                         size_t dataSize, void *pData)
1212 {
1213    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1214    struct radv_ray_tracing_group *groups = radv_pipeline_to_ray_tracing(pipeline)->groups;
1215    char *data = pData;
1216 
1217    STATIC_ASSERT(sizeof(struct radv_pipeline_group_handle) <= RADV_RT_HANDLE_SIZE);
1218 
1219    memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE);
1220 
1221    for (uint32_t i = 0; i < groupCount; ++i) {
1222       memcpy(data + i * RADV_RT_HANDLE_SIZE, &groups[firstGroup + i].handle, sizeof(struct radv_pipeline_group_handle));
1223    }
1224 
1225    return VK_SUCCESS;
1226 }
1227 
1228 VKAPI_ATTR VkDeviceSize VKAPI_CALL
radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)1229 radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group,
1230                                           VkShaderGroupShaderKHR groupShader)
1231 {
1232    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1233    struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
1234    struct radv_ray_tracing_group *rt_group = &rt_pipeline->groups[group];
1235    switch (groupShader) {
1236    case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
1237    case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
1238       return rt_pipeline->stages[rt_group->recursive_shader].stack_size;
1239    case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
1240       return rt_pipeline->stages[rt_group->any_hit_shader].stack_size;
1241    case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
1242       return rt_pipeline->stages[rt_group->intersection_shader].stack_size;
1243    default:
1244       return 0;
1245    }
1246 }
1247 
1248 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)1249 radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup,
1250                                                      uint32_t groupCount, size_t dataSize, void *pData)
1251 {
1252    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
1253    struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
1254    struct radv_rt_capture_replay_handle *data = pData;
1255 
1256    memset(data, 0, groupCount * sizeof(struct radv_rt_capture_replay_handle));
1257 
1258    for (uint32_t i = 0; i < groupCount; ++i) {
1259       uint32_t recursive_shader = rt_pipeline->groups[firstGroup + i].recursive_shader;
1260       if (recursive_shader != VK_SHADER_UNUSED_KHR) {
1261          struct radv_shader *shader = rt_pipeline->stages[recursive_shader].shader;
1262          if (shader) {
1263             data[i].recursive_shader_alloc.offset = shader->alloc->offset;
1264             data[i].recursive_shader_alloc.size = shader->alloc->size;
1265             data[i].recursive_shader_alloc.arena_va = shader->alloc->arena->bo->va;
1266             data[i].recursive_shader_alloc.arena_size = shader->alloc->arena->size;
1267          }
1268       }
1269       data[i].non_recursive_idx = rt_pipeline->groups[firstGroup + i].handle.any_hit_index;
1270    }
1271 
1272    return VK_SUCCESS;
1273 }
1274