xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_acceleration_structure.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Bas Nieuwenhuizen
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radv_sqtt.h"
8 
9 #include "meta/radv_meta.h"
10 #include "nir_builder.h"
11 #include "radv_cs.h"
12 #include "radv_entrypoints.h"
13 
14 #include "radix_sort/common/vk/barrier.h"
15 #include "radix_sort/radv_radix_sort.h"
16 #include "radix_sort/shaders/push.h"
17 
18 #include "bvh/build_interface.h"
19 #include "bvh/bvh.h"
20 
21 #include "vk_acceleration_structure.h"
22 #include "vk_common_entrypoints.h"
23 
24 static const uint32_t leaf_spv[] = {
25 #include "bvh/leaf.spv.h"
26 };
27 
28 static const uint32_t leaf_always_active_spv[] = {
29 #include "bvh/leaf_always_active.spv.h"
30 };
31 
32 static const uint32_t morton_spv[] = {
33 #include "bvh/morton.spv.h"
34 };
35 
36 static const uint32_t lbvh_main_spv[] = {
37 #include "bvh/lbvh_main.spv.h"
38 };
39 
40 static const uint32_t lbvh_generate_ir_spv[] = {
41 #include "bvh/lbvh_generate_ir.spv.h"
42 };
43 
44 static const uint32_t ploc_spv[] = {
45 #include "bvh/ploc_internal.spv.h"
46 };
47 
48 static const uint32_t copy_spv[] = {
49 #include "bvh/copy.spv.h"
50 };
51 
52 static const uint32_t encode_spv[] = {
53 #include "bvh/encode.spv.h"
54 };
55 
56 static const uint32_t encode_compact_spv[] = {
57 #include "bvh/encode_compact.spv.h"
58 };
59 
60 static const uint32_t header_spv[] = {
61 #include "bvh/header.spv.h"
62 };
63 
64 static const uint32_t update_spv[] = {
65 #include "bvh/update.spv.h"
66 };
67 
68 #define KEY_ID_PAIR_SIZE 8
69 #define MORTON_BIT_SIZE  24
70 
71 enum internal_build_type {
72    INTERNAL_BUILD_TYPE_LBVH,
73    INTERNAL_BUILD_TYPE_PLOC,
74    INTERNAL_BUILD_TYPE_UPDATE,
75 };
76 
77 struct build_config {
78    enum internal_build_type internal_type;
79    bool compact;
80    bool updateable;
81 };
82 
83 struct acceleration_structure_layout {
84    uint32_t geometry_info_offset;
85    uint32_t bvh_offset;
86    uint32_t leaf_nodes_offset;
87    uint32_t internal_nodes_offset;
88    uint32_t size;
89 };
90 
91 struct scratch_layout {
92    uint32_t size;
93    uint32_t update_size;
94 
95    uint32_t header_offset;
96 
97    /* Used for UPDATE only. */
98 
99    uint32_t internal_ready_count_offset;
100 
101    /* Used for BUILD only. */
102 
103    uint32_t sort_buffer_offset[2];
104    uint32_t sort_internal_offset;
105 
106    uint32_t ploc_prefix_sum_partition_offset;
107    uint32_t lbvh_node_offset;
108 
109    uint32_t ir_offset;
110    uint32_t internal_node_offset;
111 };
112 
113 static struct build_config
build_config(uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info)114 build_config(uint32_t leaf_count, const VkAccelerationStructureBuildGeometryInfoKHR *build_info)
115 {
116    struct build_config config = {0};
117 
118    if (leaf_count <= 4)
119       config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
120    else if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR)
121       config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
122    else if (!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR) &&
123             !(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
124       config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
125    else
126       config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
127 
128    if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
129        build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
130       config.internal_type = INTERNAL_BUILD_TYPE_UPDATE;
131 
132    if ((build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) &&
133        build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
134       config.updateable = true;
135 
136    if (build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
137       config.compact = true;
138 
139    return config;
140 }
141 
142 static void
get_build_layout(struct radv_device * device,uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,struct acceleration_structure_layout * accel_struct,struct scratch_layout * scratch)143 get_build_layout(struct radv_device *device, uint32_t leaf_count,
144                  const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
145                  struct acceleration_structure_layout *accel_struct, struct scratch_layout *scratch)
146 {
147    uint32_t internal_count = MAX2(leaf_count, 2) - 1;
148 
149    VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
150 
151    if (build_info->geometryCount) {
152       if (build_info->pGeometries)
153          geometry_type = build_info->pGeometries[0].geometryType;
154       else
155          geometry_type = build_info->ppGeometries[0]->geometryType;
156    }
157 
158    uint32_t bvh_leaf_size;
159    switch (geometry_type) {
160    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
161       bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
162       break;
163    case VK_GEOMETRY_TYPE_AABBS_KHR:
164       bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
165       break;
166    case VK_GEOMETRY_TYPE_INSTANCES_KHR:
167       bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
168       break;
169    default:
170       unreachable("Unknown VkGeometryTypeKHR");
171    }
172 
173    if (accel_struct) {
174       uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
175       uint32_t offset = 0;
176       offset += sizeof(struct radv_accel_struct_header);
177 
178       if (device->rra_trace.accel_structs) {
179          accel_struct->geometry_info_offset = offset;
180          offset += sizeof(struct radv_accel_struct_geometry_info) * build_info->geometryCount;
181       }
182       /* Parent links, which have to go directly before bvh_offset as we index them using negative
183        * offsets from there. */
184       offset += bvh_size / 64 * 4;
185 
186       /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
187       offset = ALIGN(offset, 64);
188       accel_struct->bvh_offset = offset;
189 
190       /* root node */
191       offset += sizeof(struct radv_bvh_box32_node);
192 
193       accel_struct->leaf_nodes_offset = offset;
194       offset += bvh_leaf_size * leaf_count;
195 
196       accel_struct->internal_nodes_offset = offset;
197       /* Factor out the root node. */
198       offset += sizeof(struct radv_bvh_box32_node) * (internal_count - 1);
199 
200       accel_struct->size = offset;
201    }
202 
203    if (scratch) {
204       radix_sort_vk_memory_requirements_t requirements = {
205          0,
206       };
207       if (radv_device_init_accel_struct_build_state(device) == VK_SUCCESS)
208          radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort, leaf_count,
209                                                &requirements);
210 
211       uint32_t offset = 0;
212 
213       uint32_t ploc_scratch_space = 0;
214       uint32_t lbvh_node_space = 0;
215 
216       struct build_config config = build_config(leaf_count, build_info);
217 
218       if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC)
219          ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
220       else
221          lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
222 
223       scratch->header_offset = offset;
224       offset += sizeof(struct radv_ir_header);
225 
226       scratch->sort_buffer_offset[0] = offset;
227       offset += requirements.keyvals_size;
228 
229       scratch->sort_buffer_offset[1] = offset;
230       offset += requirements.keyvals_size;
231 
232       scratch->sort_internal_offset = offset;
233       /* Internal sorting data is not needed when PLOC/LBVH are invoked,
234        * save space by aliasing them */
235       scratch->ploc_prefix_sum_partition_offset = offset;
236       scratch->lbvh_node_offset = offset;
237       offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
238 
239       scratch->ir_offset = offset;
240       offset += sizeof(struct radv_ir_node) * leaf_count;
241 
242       scratch->internal_node_offset = offset;
243       offset += sizeof(struct radv_ir_box_node) * internal_count;
244 
245       scratch->size = offset;
246 
247       if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) {
248          uint32_t update_offset = 0;
249 
250          update_offset += sizeof(radv_aabb) * leaf_count;
251          scratch->internal_ready_count_offset = update_offset;
252 
253          update_offset += sizeof(uint32_t) * internal_count;
254          scratch->update_size = update_offset;
255       } else {
256          scratch->update_size = offset;
257       }
258    }
259 }
260 
261 VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)262 radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
263                                            const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
264                                            const uint32_t *pMaxPrimitiveCounts,
265                                            VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
266 {
267    VK_FROM_HANDLE(radv_device, device, _device);
268 
269    STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
270    STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
271    STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
272    STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
273    STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
274 
275    uint32_t leaf_count = 0;
276    for (uint32_t i = 0; i < pBuildInfo->geometryCount; i++)
277       leaf_count += pMaxPrimitiveCounts[i];
278 
279    struct acceleration_structure_layout accel_struct;
280    struct scratch_layout scratch;
281    get_build_layout(device, leaf_count, pBuildInfo, &accel_struct, &scratch);
282 
283    pSizeInfo->accelerationStructureSize = accel_struct.size;
284    pSizeInfo->updateScratchSize = scratch.update_size;
285    pSizeInfo->buildScratchSize = scratch.size;
286 }
287 
288 VKAPI_ATTR VkResult VKAPI_CALL
radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device,uint32_t accelerationStructureCount,const VkAccelerationStructureKHR * pAccelerationStructures,VkQueryType queryType,size_t dataSize,void * pData,size_t stride)289 radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
290                                               const VkAccelerationStructureKHR *pAccelerationStructures,
291                                               VkQueryType queryType, size_t dataSize, void *pData, size_t stride)
292 {
293    unreachable("Unimplemented");
294    return VK_ERROR_FEATURE_NOT_PRESENT;
295 }
296 
297 VKAPI_ATTR VkResult VKAPI_CALL
radv_BuildAccelerationStructuresKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)298 radv_BuildAccelerationStructuresKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
299                                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
300                                     const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
301 {
302    unreachable("Unimplemented");
303    return VK_ERROR_FEATURE_NOT_PRESENT;
304 }
305 
306 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureInfoKHR * pInfo)307 radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
308                                   const VkCopyAccelerationStructureInfoKHR *pInfo)
309 {
310    unreachable("Unimplemented");
311    return VK_ERROR_FEATURE_NOT_PRESENT;
312 }
313 
314 void
radv_device_finish_accel_struct_build_state(struct radv_device * device)315 radv_device_finish_accel_struct_build_state(struct radv_device *device)
316 {
317    VkDevice _device = radv_device_to_handle(device);
318    struct radv_meta_state *state = &device->meta_state;
319    struct vk_device_dispatch_table *dispatch = &device->vk.dispatch_table;
320 
321    dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_pipeline, &state->alloc);
322    dispatch->DestroyPipeline(_device, state->accel_struct_build.ploc_pipeline, &state->alloc);
323    dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc);
324    dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_main_pipeline, &state->alloc);
325    dispatch->DestroyPipeline(_device, state->accel_struct_build.leaf_pipeline, &state->alloc);
326    dispatch->DestroyPipeline(_device, state->accel_struct_build.leaf_updateable_pipeline, &state->alloc);
327    dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_pipeline, &state->alloc);
328    dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_compact_pipeline, &state->alloc);
329    dispatch->DestroyPipeline(_device, state->accel_struct_build.header_pipeline, &state->alloc);
330    dispatch->DestroyPipeline(_device, state->accel_struct_build.morton_pipeline, &state->alloc);
331    dispatch->DestroyPipeline(_device, state->accel_struct_build.update_pipeline, &state->alloc);
332    radv_DestroyPipelineLayout(_device, state->accel_struct_build.copy_p_layout, &state->alloc);
333    radv_DestroyPipelineLayout(_device, state->accel_struct_build.ploc_p_layout, &state->alloc);
334    radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_generate_ir_p_layout, &state->alloc);
335    radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_main_p_layout, &state->alloc);
336    radv_DestroyPipelineLayout(_device, state->accel_struct_build.leaf_p_layout, &state->alloc);
337    radv_DestroyPipelineLayout(_device, state->accel_struct_build.encode_p_layout, &state->alloc);
338    radv_DestroyPipelineLayout(_device, state->accel_struct_build.header_p_layout, &state->alloc);
339    radv_DestroyPipelineLayout(_device, state->accel_struct_build.morton_p_layout, &state->alloc);
340    radv_DestroyPipelineLayout(_device, state->accel_struct_build.update_p_layout, &state->alloc);
341 
342    if (state->accel_struct_build.radix_sort)
343       radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
344 
345    radv_DestroyBuffer(_device, state->accel_struct_build.null.buffer, &state->alloc);
346    radv_FreeMemory(_device, state->accel_struct_build.null.memory, &state->alloc);
347    vk_common_DestroyAccelerationStructureKHR(_device, state->accel_struct_build.null.accel_struct, &state->alloc);
348 }
349 
350 static VkResult
create_build_pipeline_spv(struct radv_device * device,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)351 create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size,
352                           unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout)
353 {
354    if (*pipeline)
355       return VK_SUCCESS;
356 
357    VkDevice _device = radv_device_to_handle(device);
358 
359    const VkPipelineLayoutCreateInfo pl_create_info = {
360       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
361       .setLayoutCount = 0,
362       .pushConstantRangeCount = 1,
363       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
364    };
365 
366    VkShaderModuleCreateInfo module_info = {
367       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
368       .pNext = NULL,
369       .flags = 0,
370       .codeSize = spv_size,
371       .pCode = spv,
372    };
373 
374    VkShaderModule module;
375    VkResult result =
376       device->vk.dispatch_table.CreateShaderModule(_device, &module_info, &device->meta_state.alloc, &module);
377    if (result != VK_SUCCESS)
378       return result;
379 
380    if (!*layout) {
381       result = radv_CreatePipelineLayout(_device, &pl_create_info, &device->meta_state.alloc, layout);
382       if (result != VK_SUCCESS)
383          goto cleanup;
384    }
385 
386    VkPipelineShaderStageCreateInfo shader_stage = {
387       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
388       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
389       .module = module,
390       .pName = "main",
391       .pSpecializationInfo = NULL,
392    };
393 
394    VkComputePipelineCreateInfo pipeline_info = {
395       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
396       .stage = shader_stage,
397       .flags = 0,
398       .layout = *layout,
399    };
400 
401    result = device->vk.dispatch_table.CreateComputePipelines(_device, device->meta_state.cache, 1, &pipeline_info,
402                                                              &device->meta_state.alloc, pipeline);
403 
404 cleanup:
405    device->vk.dispatch_table.DestroyShaderModule(_device, module, &device->meta_state.alloc);
406    return result;
407 }
408 
409 VkResult
radv_device_init_null_accel_struct(struct radv_device * device)410 radv_device_init_null_accel_struct(struct radv_device *device)
411 {
412    const struct radv_physical_device *pdev = radv_device_physical(device);
413 
414    if (pdev->memory_properties.memoryTypeCount == 0)
415       return VK_SUCCESS; /* Exit in the case of null winsys. */
416 
417    VkDevice _device = radv_device_to_handle(device);
418 
419    uint32_t bvh_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
420    uint32_t size = bvh_offset + sizeof(struct radv_bvh_box32_node);
421 
422    VkResult result;
423 
424    VkBuffer buffer = VK_NULL_HANDLE;
425    VkDeviceMemory memory = VK_NULL_HANDLE;
426    VkAccelerationStructureKHR accel_struct = VK_NULL_HANDLE;
427 
428    VkBufferCreateInfo buffer_create_info = {
429       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
430       .pNext =
431          &(VkBufferUsageFlags2CreateInfoKHR){
432             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
433             .usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
434          },
435       .size = size,
436       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
437    };
438 
439    result = radv_CreateBuffer(_device, &buffer_create_info, &device->meta_state.alloc, &buffer);
440    if (result != VK_SUCCESS)
441       return result;
442 
443    VkBufferMemoryRequirementsInfo2 info = {
444       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
445       .buffer = buffer,
446    };
447    VkMemoryRequirements2 mem_req = {
448       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
449    };
450    vk_common_GetBufferMemoryRequirements2(_device, &info, &mem_req);
451 
452    VkMemoryAllocateInfo alloc_info = {
453       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
454       .allocationSize = mem_req.memoryRequirements.size,
455       .memoryTypeIndex =
456          radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
457                                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
458    };
459 
460    result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
461    if (result != VK_SUCCESS)
462       return result;
463 
464    VkBindBufferMemoryInfo bind_info = {
465       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
466       .buffer = buffer,
467       .memory = memory,
468    };
469 
470    result = radv_BindBufferMemory2(_device, 1, &bind_info);
471    if (result != VK_SUCCESS)
472       return result;
473 
474    void *data;
475    result = vk_common_MapMemory(_device, memory, 0, size, 0, &data);
476    if (result != VK_SUCCESS)
477       return result;
478 
479    struct radv_accel_struct_header header = {
480       .bvh_offset = bvh_offset,
481    };
482    memcpy(data, &header, sizeof(struct radv_accel_struct_header));
483 
484    struct radv_bvh_box32_node root = {
485       .children =
486          {
487             RADV_BVH_INVALID_NODE,
488             RADV_BVH_INVALID_NODE,
489             RADV_BVH_INVALID_NODE,
490             RADV_BVH_INVALID_NODE,
491          },
492    };
493 
494    for (uint32_t child = 0; child < 4; child++) {
495       root.coords[child] = (radv_aabb){
496          .min.x = NAN,
497          .min.y = NAN,
498          .min.z = NAN,
499          .max.x = NAN,
500          .max.y = NAN,
501          .max.z = NAN,
502       };
503    }
504 
505    memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
506 
507    vk_common_UnmapMemory(_device, memory);
508 
509    VkAccelerationStructureCreateInfoKHR create_info = {
510       .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
511       .buffer = buffer,
512       .size = size,
513       .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
514    };
515 
516    result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct);
517    if (result != VK_SUCCESS)
518       return result;
519 
520    device->meta_state.accel_struct_build.null.buffer = buffer;
521    device->meta_state.accel_struct_build.null.memory = memory;
522    device->meta_state.accel_struct_build.null.accel_struct = accel_struct;
523 
524    return VK_SUCCESS;
525 }
526 
527 VkResult
radv_device_init_accel_struct_build_state(struct radv_device * device)528 radv_device_init_accel_struct_build_state(struct radv_device *device)
529 {
530    VkResult result = VK_SUCCESS;
531    mtx_lock(&device->meta_state.mtx);
532 
533    if (device->meta_state.accel_struct_build.radix_sort)
534       goto exit;
535 
536    result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv),
537                                       sizeof(struct leaf_args),
538                                       &device->meta_state.accel_struct_build.leaf_updateable_pipeline,
539                                       &device->meta_state.accel_struct_build.leaf_p_layout);
540    if (result != VK_SUCCESS)
541       goto exit;
542 
543    result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
544                                       &device->meta_state.accel_struct_build.leaf_pipeline,
545                                       &device->meta_state.accel_struct_build.leaf_p_layout);
546    if (result != VK_SUCCESS)
547       goto exit;
548 
549    result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv), sizeof(struct lbvh_main_args),
550                                       &device->meta_state.accel_struct_build.lbvh_main_pipeline,
551                                       &device->meta_state.accel_struct_build.lbvh_main_p_layout);
552    if (result != VK_SUCCESS)
553       goto exit;
554 
555    result = create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
556                                       sizeof(struct lbvh_generate_ir_args),
557                                       &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline,
558                                       &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout);
559    if (result != VK_SUCCESS)
560       goto exit;
561 
562    result = create_build_pipeline_spv(device, ploc_spv, sizeof(ploc_spv), sizeof(struct ploc_args),
563                                       &device->meta_state.accel_struct_build.ploc_pipeline,
564                                       &device->meta_state.accel_struct_build.ploc_p_layout);
565    if (result != VK_SUCCESS)
566       goto exit;
567 
568    result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
569                                       &device->meta_state.accel_struct_build.encode_pipeline,
570                                       &device->meta_state.accel_struct_build.encode_p_layout);
571    if (result != VK_SUCCESS)
572       goto exit;
573 
574    result =
575       create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
576                                 &device->meta_state.accel_struct_build.encode_compact_pipeline,
577                                 &device->meta_state.accel_struct_build.encode_p_layout);
578    if (result != VK_SUCCESS)
579       goto exit;
580 
581    result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
582                                       &device->meta_state.accel_struct_build.header_pipeline,
583                                       &device->meta_state.accel_struct_build.header_p_layout);
584    if (result != VK_SUCCESS)
585       goto exit;
586 
587    result = create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args),
588                                       &device->meta_state.accel_struct_build.morton_pipeline,
589                                       &device->meta_state.accel_struct_build.morton_p_layout);
590    if (result != VK_SUCCESS)
591       goto exit;
592 
593    result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
594                                       &device->meta_state.accel_struct_build.update_pipeline,
595                                       &device->meta_state.accel_struct_build.update_p_layout);
596    if (result != VK_SUCCESS)
597       goto exit;
598 
599    device->meta_state.accel_struct_build.radix_sort =
600       radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache);
601 exit:
602    mtx_unlock(&device->meta_state.mtx);
603    return result;
604 }
605 
606 static VkResult
radv_device_init_accel_struct_copy_state(struct radv_device * device)607 radv_device_init_accel_struct_copy_state(struct radv_device *device)
608 {
609    mtx_lock(&device->meta_state.mtx);
610 
611    VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
612                                                &device->meta_state.accel_struct_build.copy_pipeline,
613                                                &device->meta_state.accel_struct_build.copy_p_layout);
614 
615    mtx_unlock(&device->meta_state.mtx);
616    return result;
617 }
618 
619 struct bvh_state {
620    uint32_t node_count;
621    uint32_t scratch_offset;
622 
623    uint32_t leaf_node_count;
624    uint32_t internal_node_count;
625    uint32_t leaf_node_size;
626 
627    struct acceleration_structure_layout accel_struct;
628    struct scratch_layout scratch;
629    struct build_config config;
630 
631    /* Radix sort state */
632    uint32_t scatter_blocks;
633    uint32_t count_ru_scatter;
634    uint32_t histo_blocks;
635    uint32_t count_ru_histo;
636    struct rs_push_scatter push_scatter;
637 };
638 
639 struct radv_bvh_batch_state {
640    bool any_compact;
641    bool any_non_compact;
642    bool any_ploc;
643    bool any_lbvh;
644    bool any_updateable;
645    bool any_non_updateable;
646    bool any_update;
647 };
648 
649 static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id,uint32_t flags)650 pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
651 {
652    uint32_t geometry_id_and_flags = geometry_id;
653    if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
654       geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
655 
656    return geometry_id_and_flags;
657 }
658 
659 static struct radv_bvh_geometry_data
fill_geometry_data(VkAccelerationStructureTypeKHR type,struct bvh_state * bvh_state,uint32_t geom_index,const VkAccelerationStructureGeometryKHR * geometry,const VkAccelerationStructureBuildRangeInfoKHR * build_range_info)660 fill_geometry_data(VkAccelerationStructureTypeKHR type, struct bvh_state *bvh_state, uint32_t geom_index,
661                    const VkAccelerationStructureGeometryKHR *geometry,
662                    const VkAccelerationStructureBuildRangeInfoKHR *build_range_info)
663 {
664    struct radv_bvh_geometry_data data = {
665       .first_id = bvh_state->node_count,
666       .geometry_id = pack_geometry_id_and_flags(geom_index, geometry->flags),
667       .geometry_type = geometry->geometryType,
668    };
669 
670    switch (geometry->geometryType) {
671    case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
672       assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
673 
674       data.data = geometry->geometry.triangles.vertexData.deviceAddress +
675                   build_range_info->firstVertex * geometry->geometry.triangles.vertexStride;
676       data.indices = geometry->geometry.triangles.indexData.deviceAddress;
677 
678       if (geometry->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR)
679          data.data += build_range_info->primitiveOffset;
680       else
681          data.indices += build_range_info->primitiveOffset;
682 
683       data.transform = geometry->geometry.triangles.transformData.deviceAddress;
684       if (data.transform)
685          data.transform += build_range_info->transformOffset;
686 
687       data.stride = geometry->geometry.triangles.vertexStride;
688       data.vertex_format = geometry->geometry.triangles.vertexFormat;
689       data.index_format = geometry->geometry.triangles.indexType;
690       break;
691    case VK_GEOMETRY_TYPE_AABBS_KHR:
692       assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
693 
694       data.data = geometry->geometry.aabbs.data.deviceAddress + build_range_info->primitiveOffset;
695       data.stride = geometry->geometry.aabbs.stride;
696       break;
697    case VK_GEOMETRY_TYPE_INSTANCES_KHR:
698       assert(type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
699 
700       data.data = geometry->geometry.instances.data.deviceAddress + build_range_info->primitiveOffset;
701 
702       if (geometry->geometry.instances.arrayOfPointers)
703          data.stride = 8;
704       else
705          data.stride = sizeof(VkAccelerationStructureInstanceKHR);
706       break;
707    default:
708       unreachable("Unknown geometryType");
709    }
710 
711    return data;
712 }
713 
714 static void
build_leaves(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states,bool updateable)715 build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
716              const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
717              const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states,
718              bool updateable)
719 {
720    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
721    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
722 
723    radv_write_user_event_marker(cmd_buffer, UserEventPush, "leaves");
724 
725    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
726                                              updateable ? device->meta_state.accel_struct_build.leaf_updateable_pipeline
727                                                         : device->meta_state.accel_struct_build.leaf_pipeline);
728 
729    for (uint32_t i = 0; i < infoCount; ++i) {
730       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
731          continue;
732       if (bvh_states[i].config.updateable != updateable)
733          continue;
734 
735       VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
736 
737       struct leaf_args leaf_consts = {
738          .ir = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
739          .bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.leaf_nodes_offset,
740          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
741          .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
742       };
743 
744       for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
745          const VkAccelerationStructureGeometryKHR *geom =
746             pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
747 
748          const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
749 
750          leaf_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
751 
752          vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.leaf_p_layout,
753                                     VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts);
754          radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
755 
756          bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
757          bvh_states[i].node_count += build_range_info->primitiveCount;
758       }
759    }
760 
761    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
762 }
763 
764 static void
morton_generate(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)765 morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount,
766                 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
767                 enum radv_cmd_flush_bits flush_bits)
768 {
769    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
770    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
771 
772    radv_write_user_event_marker(cmd_buffer, UserEventPush, "morton");
773 
774    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
775                                              device->meta_state.accel_struct_build.morton_pipeline);
776 
777    for (uint32_t i = 0; i < infoCount; ++i) {
778       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
779          continue;
780       const struct morton_args consts = {
781          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
782          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
783          .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
784       };
785 
786       vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.morton_p_layout,
787                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
788       radv_unaligned_dispatch(cmd_buffer, bvh_states[i].node_count, 1, 1);
789    }
790 
791    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
792 
793    cmd_buffer->state.flush_bits |= flush_bits;
794 }
795 
796 static void
morton_sort(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)797 morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
798             const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
799             enum radv_cmd_flush_bits flush_bits)
800 {
801    /* Copyright 2019 The Fuchsia Authors. */
802    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
803    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
804 
805    radv_write_user_event_marker(cmd_buffer, UserEventPush, "sort");
806 
807    radix_sort_vk_t *rs = device->meta_state.accel_struct_build.radix_sort;
808 
809    /*
810     * OVERVIEW
811     *
812     *   1. Pad the keyvals in `scatter_even`.
813     *   2. Zero the `histograms` and `partitions`.
814     *      --- BARRIER ---
815     *   3. HISTOGRAM is dispatched before PREFIX.
816     *      --- BARRIER ---
817     *   4. PREFIX is dispatched before the first SCATTER.
818     *      --- BARRIER ---
819     *   5. One or more SCATTER dispatches.
820     *
821     * Note that the `partitions` buffer can be zeroed anytime before the first
822     * scatter.
823     */
824 
825    /* How many passes? */
826    uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
827    uint32_t keyval_bits = keyval_bytes * 8;
828    uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
829    uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
830 
831    for (uint32_t i = 0; i < infoCount; ++i) {
832       if (bvh_states[i].node_count)
833          bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
834       else
835          bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
836    }
837 
838    /*
839     * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
840     *
841     * Pad fractional blocks with max-valued keyvals.
842     *
843     * Zero the histograms and partitions buffer.
844     *
845     * This assumes the partitions follow the histograms.
846     */
847 
848    /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
849 
850    /* How many scatter blocks? */
851    uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
852    uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
853 
854    /*
855     * How many histogram blocks?
856     *
857     * Note that it's OK to have more max-valued digits counted by the histogram
858     * than sorted by the scatters because the sort is stable.
859     */
860    uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
861    uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
862 
863    uint32_t pass_idx = (keyval_bytes - passes);
864 
865    for (uint32_t i = 0; i < infoCount; ++i) {
866       if (!bvh_states[i].node_count)
867          continue;
868       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
869          continue;
870 
871       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
872       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
873 
874       bvh_states[i].scatter_blocks = (bvh_states[i].node_count + scatter_block_kvs - 1) / scatter_block_kvs;
875       bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
876 
877       bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
878       bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
879 
880       /* Fill with max values */
881       if (bvh_states[i].count_ru_histo > bvh_states[i].node_count) {
882          radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + bvh_states[i].node_count * keyval_bytes,
883                           (bvh_states[i].count_ru_histo - bvh_states[i].node_count) * keyval_bytes, 0xFFFFFFFF);
884       }
885 
886       /*
887        * Zero histograms and invalidate partitions.
888        *
889        * Note that the partition invalidation only needs to be performed once
890        * because the even/odd scatter dispatches rely on the the previous pass to
891        * leave the partitions in an invalid state.
892        *
893        * Note that the last workgroup doesn't read/write a partition so it doesn't
894        * need to be initialized.
895        */
896       uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
897 
898       uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
899 
900       radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
901                        histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
902    }
903 
904    /*
905     * Pipeline: HISTOGRAM
906     *
907     * TODO(allanmac): All subgroups should try to process approximately the same
908     * number of blocks in order to minimize tail effects.  This was implemented
909     * and reverted but should be reimplemented and benchmarked later.
910     */
911    vk_barrier_transfer_w_to_compute_r(commandBuffer);
912 
913    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
914                                              rs->pipelines.named.histogram);
915 
916    for (uint32_t i = 0; i < infoCount; ++i) {
917       if (!bvh_states[i].node_count)
918          continue;
919       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
920          continue;
921 
922       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
923       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
924 
925       /* Dispatch histogram */
926       struct rs_push_histogram push_histogram = {
927          .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
928          .devaddr_keyvals = keyvals_even_addr,
929          .passes = passes,
930       };
931 
932       vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
933                                  sizeof(push_histogram), &push_histogram);
934 
935       vk_common_CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
936    }
937 
938    /*
939     * Pipeline: PREFIX
940     *
941     * Launch one workgroup per pass.
942     */
943    vk_barrier_compute_w_to_compute_r(commandBuffer);
944 
945    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
946 
947    for (uint32_t i = 0; i < infoCount; ++i) {
948       if (!bvh_states[i].node_count)
949          continue;
950       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
951          continue;
952 
953       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
954 
955       struct rs_push_prefix push_prefix = {
956          .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
957       };
958 
959       vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
960                                  sizeof(push_prefix), &push_prefix);
961 
962       vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
963    }
964 
965    /* Pipeline: SCATTER */
966    vk_barrier_compute_w_to_compute_r(commandBuffer);
967 
968    uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
969 
970    for (uint32_t i = 0; i < infoCount; i++) {
971       uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
972       uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
973       uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
974 
975       bvh_states[i].push_scatter = (struct rs_push_scatter){
976          .devaddr_keyvals_even = keyvals_even_addr,
977          .devaddr_keyvals_odd = keyvals_odd_addr,
978          .devaddr_partitions = internal_addr + rs->internal.partitions.offset,
979          .devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
980       };
981    }
982 
983    bool is_even = true;
984 
985    while (true) {
986       uint32_t pass_dword = pass_idx / 4;
987 
988       /* Bind new pipeline */
989       VkPipeline p =
990          is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
991       device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
992 
993       /* Update push constants that changed */
994       VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
995                                     : rs->pipeline_layouts.named.scatter[pass_dword].odd;
996 
997       for (uint32_t i = 0; i < infoCount; i++) {
998          if (!bvh_states[i].node_count)
999             continue;
1000          if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1001             continue;
1002 
1003          bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
1004 
1005          vk_common_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
1006                                     &bvh_states[i].push_scatter);
1007 
1008          vk_common_CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
1009 
1010          bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
1011       }
1012 
1013       /* Continue? */
1014       if (++pass_idx >= keyval_bytes)
1015          break;
1016 
1017       vk_barrier_compute_w_to_compute_r(commandBuffer);
1018 
1019       is_even ^= true;
1020    }
1021 
1022    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1023 
1024    cmd_buffer->state.flush_bits |= flush_bits;
1025 }
1026 
1027 static void
lbvh_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)1028 lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1029                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1030                     enum radv_cmd_flush_bits flush_bits)
1031 {
1032    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1033    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1034 
1035    radv_write_user_event_marker(cmd_buffer, UserEventPush, "lbvh");
1036 
1037    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1038                                              device->meta_state.accel_struct_build.lbvh_main_pipeline);
1039 
1040    for (uint32_t i = 0; i < infoCount; ++i) {
1041       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1042          continue;
1043 
1044       uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1045       uint32_t internal_node_count = MAX2(bvh_states[i].node_count, 2) - 1;
1046 
1047       const struct lbvh_main_args consts = {
1048          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1049          .src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1050          .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1051          .id_count = bvh_states[i].node_count,
1052          .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1053       };
1054 
1055       vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.lbvh_main_p_layout,
1056                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1057       radv_unaligned_dispatch(cmd_buffer, internal_node_count, 1, 1);
1058       bvh_states[i].node_count = internal_node_count;
1059       bvh_states[i].internal_node_count = internal_node_count;
1060    }
1061 
1062    cmd_buffer->state.flush_bits |= flush_bits;
1063 
1064    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1065                                              device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline);
1066 
1067    for (uint32_t i = 0; i < infoCount; ++i) {
1068       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1069          continue;
1070 
1071       const struct lbvh_generate_ir_args consts = {
1072          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1073          .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1074          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1075          .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1076       };
1077 
1078       vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout,
1079                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1080       radv_unaligned_dispatch(cmd_buffer, bvh_states[i].internal_node_count, 1, 1);
1081    }
1082 
1083    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1084 }
1085 
1086 static void
ploc_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states)1087 ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1088                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
1089 {
1090    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1091    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1092 
1093    radv_write_user_event_marker(cmd_buffer, UserEventPush, "ploc");
1094 
1095    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1096                                              device->meta_state.accel_struct_build.ploc_pipeline);
1097 
1098    for (uint32_t i = 0; i < infoCount; ++i) {
1099       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
1100          continue;
1101 
1102       uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1103       uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
1104                                        ? bvh_states[i].scratch.sort_buffer_offset[1]
1105                                        : bvh_states[i].scratch.sort_buffer_offset[0];
1106 
1107       const struct ploc_args consts = {
1108          .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1109          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1110          .ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1111          .ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset,
1112          .prefix_scan_partitions =
1113             pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
1114          .internal_node_offset = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1115       };
1116 
1117       vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.ploc_p_layout,
1118                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1119       vk_common_CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
1120    }
1121 
1122    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1123 }
1124 
1125 static void
encode_nodes(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,bool compact)1126 encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
1127              const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, bool compact)
1128 {
1129    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1130    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1131 
1132    radv_write_user_event_marker(cmd_buffer, UserEventPush, "encode");
1133 
1134    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1135                                              compact ? device->meta_state.accel_struct_build.encode_compact_pipeline
1136                                                      : device->meta_state.accel_struct_build.encode_pipeline);
1137 
1138    for (uint32_t i = 0; i < infoCount; ++i) {
1139       if (compact != bvh_states[i].config.compact)
1140          continue;
1141       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1142          continue;
1143 
1144       VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1145 
1146       VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
1147 
1148       /* If the geometry count is 0, then the size does not matter
1149        * because it will be multiplied with 0.
1150        */
1151       if (pInfos[i].geometryCount)
1152          geometry_type =
1153             pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType : pInfos[i].ppGeometries[0]->geometryType;
1154 
1155       if (bvh_states[i].config.compact) {
1156          uint32_t dst_offset = bvh_states[i].accel_struct.internal_nodes_offset - bvh_states[i].accel_struct.bvh_offset;
1157          radv_update_buffer_cp(cmd_buffer,
1158                                pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1159                                   offsetof(struct radv_ir_header, dst_node_offset),
1160                                &dst_offset, sizeof(uint32_t));
1161       }
1162 
1163       const struct encode_args args = {
1164          .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1165          .output_bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset,
1166          .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1167          .output_bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1168          .leaf_node_count = bvh_states[i].leaf_node_count,
1169          .geometry_type = geometry_type,
1170       };
1171       vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.encode_p_layout,
1172                                  VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1173 
1174       struct radv_dispatch_info dispatch = {
1175          .unaligned = true,
1176          .ordered = true,
1177          .va = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1178                offsetof(struct radv_ir_header, ir_internal_node_count),
1179       };
1180 
1181       radv_compute_dispatch(cmd_buffer, &dispatch);
1182    }
1183    /* This is the final access to the leaf nodes, no need to flush */
1184 
1185    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1186 }
1187 
1188 static void
init_header(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,struct radv_bvh_batch_state * batch_state)1189 init_header(VkCommandBuffer commandBuffer, uint32_t infoCount,
1190             const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1191             struct radv_bvh_batch_state *batch_state)
1192 {
1193    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1194    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1195 
1196    if (batch_state->any_compact) {
1197       radv_write_user_event_marker(cmd_buffer, UserEventPush, "header");
1198 
1199       device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1200                                                 device->meta_state.accel_struct_build.header_pipeline);
1201    }
1202 
1203    for (uint32_t i = 0; i < infoCount; ++i) {
1204       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1205          continue;
1206       VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1207       size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
1208 
1209       uint64_t instance_count =
1210          pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? bvh_states[i].leaf_node_count : 0;
1211 
1212       if (bvh_states[i].config.compact) {
1213          base = offsetof(struct radv_accel_struct_header, geometry_count);
1214 
1215          struct header_args args = {
1216             .src = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1217             .dst = vk_acceleration_structure_get_va(accel_struct),
1218             .bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1219             .instance_count = instance_count,
1220          };
1221 
1222          vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.header_p_layout,
1223                                     VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1224 
1225          radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
1226       }
1227 
1228       struct radv_accel_struct_header header;
1229 
1230       header.instance_offset = bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node);
1231       header.instance_count = instance_count;
1232       header.compacted_size = bvh_states[i].accel_struct.size;
1233 
1234       header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
1235       header.copy_dispatch_size[1] = 1;
1236       header.copy_dispatch_size[2] = 1;
1237 
1238       header.serialization_size =
1239          header.compacted_size +
1240          align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
1241 
1242       header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
1243                     sizeof(uint64_t) * header.instance_count;
1244 
1245       header.build_flags = pInfos[i].flags;
1246       header.geometry_count = pInfos[i].geometryCount;
1247 
1248       radv_update_buffer_cp(cmd_buffer, vk_acceleration_structure_get_va(accel_struct) + base,
1249                             (const char *)&header + base, sizeof(header) - base);
1250    }
1251 
1252    if (batch_state->any_compact)
1253       radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1254 }
1255 
1256 static void
init_geometry_infos(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1257 init_geometry_infos(VkCommandBuffer commandBuffer, uint32_t infoCount,
1258                     const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1259                     const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1260 {
1261    for (uint32_t i = 0; i < infoCount; ++i) {
1262       if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1263          continue;
1264       VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1265 
1266       uint64_t geometry_infos_size = pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info);
1267 
1268       struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
1269       if (!geometry_infos)
1270          continue;
1271 
1272       for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1273          const VkAccelerationStructureGeometryKHR *geometry =
1274             pInfos[i].pGeometries ? pInfos[i].pGeometries + j : pInfos[i].ppGeometries[j];
1275          geometry_infos[j].type = geometry->geometryType;
1276          geometry_infos[j].flags = geometry->flags;
1277          geometry_infos[j].primitive_count = ppBuildRangeInfos[i][j].primitiveCount;
1278       }
1279 
1280       radv_CmdUpdateBuffer(commandBuffer, accel_struct->buffer,
1281                            accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset, geometry_infos_size,
1282                            geometry_infos);
1283 
1284       free(geometry_infos);
1285    }
1286 }
1287 
1288 static void
update(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states)1289 update(VkCommandBuffer commandBuffer, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1290        const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states)
1291 {
1292    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1293    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1294 
1295    radv_write_user_event_marker(cmd_buffer, UserEventPush, "update");
1296 
1297    device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1298                                              device->meta_state.accel_struct_build.update_pipeline);
1299 
1300    for (uint32_t i = 0; i < infoCount; ++i) {
1301       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE)
1302          continue;
1303 
1304       uint32_t leaf_node_count = 0;
1305       for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1306          leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1307       }
1308 
1309       VK_FROM_HANDLE(vk_acceleration_structure, src_bvh, pInfos[i].srcAccelerationStructure);
1310       VK_FROM_HANDLE(vk_acceleration_structure, dst_bvh, pInfos[i].dstAccelerationStructure);
1311       struct update_args update_consts = {
1312          .src = vk_acceleration_structure_get_va(src_bvh),
1313          .dst = vk_acceleration_structure_get_va(dst_bvh),
1314          .leaf_bounds = pInfos[i].scratchData.deviceAddress,
1315          .internal_ready_count =
1316             pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1317          .leaf_node_count = leaf_node_count,
1318       };
1319 
1320       for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
1321          const VkAccelerationStructureGeometryKHR *geom =
1322             pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
1323 
1324          const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
1325 
1326          update_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
1327 
1328          vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.update_p_layout,
1329                                     VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(update_consts), &update_consts);
1330          radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
1331 
1332          bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
1333          bvh_states[i].node_count += build_range_info->primitiveCount;
1334       }
1335    }
1336 
1337    radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1338 }
1339 
1340 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1341 radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1342                                        const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1343                                        const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1344 {
1345    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1346    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1347    struct radv_meta_saved_state saved_state;
1348 
1349    VkResult result = radv_device_init_accel_struct_build_state(device);
1350    if (result != VK_SUCCESS) {
1351       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1352       return;
1353    }
1354 
1355    enum radv_cmd_flush_bits flush_bits =
1356       RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1357       radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
1358       radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
1359 
1360    radv_meta_save(&saved_state, cmd_buffer,
1361                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1362    struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
1363 
1364    radv_describe_begin_accel_struct_build(cmd_buffer, infoCount);
1365 
1366    struct radv_bvh_batch_state batch_state = {0};
1367 
1368    for (uint32_t i = 0; i < infoCount; ++i) {
1369       uint32_t leaf_node_count = 0;
1370       for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1371          leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1372       }
1373 
1374       get_build_layout(device, leaf_node_count, pInfos + i, &bvh_states[i].accel_struct, &bvh_states[i].scratch);
1375 
1376       struct build_config config = build_config(leaf_node_count, pInfos + i);
1377       bvh_states[i].config = config;
1378 
1379       if (config.compact)
1380          batch_state.any_compact = true;
1381       else
1382          batch_state.any_non_compact = true;
1383 
1384       if (config.updateable)
1385          batch_state.any_updateable = true;
1386       else
1387          batch_state.any_non_updateable = true;
1388 
1389       if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC) {
1390          batch_state.any_ploc = true;
1391       } else if (config.internal_type == INTERNAL_BUILD_TYPE_LBVH) {
1392          batch_state.any_lbvh = true;
1393       } else if (config.internal_type == INTERNAL_BUILD_TYPE_UPDATE) {
1394          batch_state.any_update = true;
1395       } else {
1396          unreachable("Unknown internal_build_type");
1397       }
1398 
1399       if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE) {
1400          /* The internal node count is updated in lbvh_build_internal for LBVH
1401           * and from the PLOC shader for PLOC. */
1402          struct radv_ir_header header = {
1403             .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
1404             .max_bounds = {0x80000000, 0x80000000, 0x80000000},
1405             .dispatch_size_y = 1,
1406             .dispatch_size_z = 1,
1407             .sync_data =
1408                {
1409                   .current_phase_end_counter = TASK_INDEX_INVALID,
1410                   /* Will be updated by the first PLOC shader invocation */
1411                   .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
1412                },
1413          };
1414 
1415          radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1416                                &header, sizeof(header));
1417       } else {
1418          /* Prepare ready counts for internal nodes */
1419          radv_fill_buffer(cmd_buffer, NULL, NULL,
1420                           pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1421                           bvh_states[i].scratch.update_size - bvh_states[i].scratch.internal_ready_count_offset, 0x0);
1422          if (pInfos[i].srcAccelerationStructure != pInfos[i].dstAccelerationStructure) {
1423             VK_FROM_HANDLE(vk_acceleration_structure, src_as, pInfos[i].srcAccelerationStructure);
1424             VK_FROM_HANDLE(vk_acceleration_structure, dst_as, pInfos[i].dstAccelerationStructure);
1425 
1426             VK_FROM_HANDLE(radv_buffer, src_as_buffer, src_as->buffer);
1427             VK_FROM_HANDLE(radv_buffer, dst_as_buffer, dst_as->buffer);
1428 
1429             /* Copy header/metadata */
1430             radv_copy_buffer(cmd_buffer, src_as_buffer->bo, dst_as_buffer->bo, src_as_buffer->offset + src_as->offset,
1431                              dst_as_buffer->offset + dst_as->offset, bvh_states[i].accel_struct.bvh_offset);
1432          }
1433       }
1434    }
1435 
1436    cmd_buffer->state.current_event_type = EventInternalUnknown;
1437 
1438    if (batch_state.any_lbvh || batch_state.any_ploc) {
1439       if (batch_state.any_non_updateable)
1440          build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states, false);
1441       if (batch_state.any_updateable)
1442          build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states, true);
1443 
1444       cmd_buffer->state.flush_bits |= flush_bits;
1445 
1446       morton_generate(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1447 
1448       morton_sort(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1449 
1450       cmd_buffer->state.flush_bits |= flush_bits;
1451 
1452       if (batch_state.any_lbvh)
1453          lbvh_build_internal(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1454 
1455       if (batch_state.any_ploc)
1456          ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states);
1457 
1458       cmd_buffer->state.flush_bits |= flush_bits;
1459 
1460       if (batch_state.any_non_compact)
1461          encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, false);
1462 
1463       if (batch_state.any_compact)
1464          encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, true);
1465 
1466       cmd_buffer->state.flush_bits |= flush_bits;
1467    }
1468 
1469    init_header(commandBuffer, infoCount, pInfos, bvh_states, &batch_state);
1470 
1471    if (device->rra_trace.accel_structs)
1472       init_geometry_infos(commandBuffer, infoCount, pInfos, bvh_states, ppBuildRangeInfos);
1473 
1474    if (batch_state.any_update)
1475       update(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states);
1476 
1477    radv_describe_end_accel_struct_build(cmd_buffer);
1478 
1479    free(bvh_states);
1480    radv_meta_restore(&saved_state, cmd_buffer);
1481 }
1482 
1483 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1484 radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1485 {
1486    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1487    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1488    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1489    VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1490    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1491    struct radv_meta_saved_state saved_state;
1492 
1493    VkResult result = radv_device_init_accel_struct_copy_state(device);
1494    if (result != VK_SUCCESS) {
1495       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1496       return;
1497    }
1498 
1499    radv_meta_save(&saved_state, cmd_buffer,
1500                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1501 
1502    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1503                         device->meta_state.accel_struct_build.copy_pipeline);
1504 
1505    struct copy_args consts = {
1506       .src_addr = vk_acceleration_structure_get_va(src),
1507       .dst_addr = vk_acceleration_structure_get_va(dst),
1508       .mode = RADV_COPY_MODE_COPY,
1509    };
1510 
1511    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1512                               device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1513                               sizeof(consts), &consts);
1514 
1515    cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
1516                                                          VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1517 
1518    radv_indirect_dispatch(
1519       cmd_buffer, src_buffer->bo,
1520       vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1521    radv_meta_restore(&saved_state, cmd_buffer);
1522 }
1523 
1524 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)1525 radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
1526                                                     const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
1527                                                     VkAccelerationStructureCompatibilityKHR *pCompatibility)
1528 {
1529    VK_FROM_HANDLE(radv_device, device, _device);
1530    const struct radv_physical_device *pdev = radv_device_physical(device);
1531    bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
1532                  memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
1533    *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
1534                             : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
1535 }
1536 
1537 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1538 radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1539                                           const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1540 {
1541    unreachable("Unimplemented");
1542    return VK_ERROR_FEATURE_NOT_PRESENT;
1543 }
1544 
1545 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1546 radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1547                                           const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1548 {
1549    unreachable("Unimplemented");
1550    return VK_ERROR_FEATURE_NOT_PRESENT;
1551 }
1552 
1553 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1554 radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1555                                              const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1556 {
1557    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1558    VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1559    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1560    struct radv_meta_saved_state saved_state;
1561 
1562    VkResult result = radv_device_init_accel_struct_copy_state(device);
1563    if (result != VK_SUCCESS) {
1564       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1565       return;
1566    }
1567 
1568    radv_meta_save(&saved_state, cmd_buffer,
1569                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1570 
1571    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1572                         device->meta_state.accel_struct_build.copy_pipeline);
1573 
1574    const struct copy_args consts = {
1575       .src_addr = pInfo->src.deviceAddress,
1576       .dst_addr = vk_acceleration_structure_get_va(dst),
1577       .mode = RADV_COPY_MODE_DESERIALIZE,
1578    };
1579 
1580    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1581                               device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1582                               sizeof(consts), &consts);
1583 
1584    vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
1585    radv_meta_restore(&saved_state, cmd_buffer);
1586 }
1587 
1588 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1589 radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1590                                              const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1591 {
1592    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1593    VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1594    VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1595    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1596    const struct radv_physical_device *pdev = radv_device_physical(device);
1597    struct radv_meta_saved_state saved_state;
1598 
1599    VkResult result = radv_device_init_accel_struct_copy_state(device);
1600    if (result != VK_SUCCESS) {
1601       vk_command_buffer_set_error(&cmd_buffer->vk, result);
1602       return;
1603    }
1604 
1605    radv_meta_save(&saved_state, cmd_buffer,
1606                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1607 
1608    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1609                         device->meta_state.accel_struct_build.copy_pipeline);
1610 
1611    const struct copy_args consts = {
1612       .src_addr = vk_acceleration_structure_get_va(src),
1613       .dst_addr = pInfo->dst.deviceAddress,
1614       .mode = RADV_COPY_MODE_SERIALIZE,
1615    };
1616 
1617    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1618                               device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1619                               sizeof(consts), &consts);
1620 
1621    cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
1622                                                          VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1623 
1624    radv_indirect_dispatch(
1625       cmd_buffer, src_buffer->bo,
1626       vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1627    radv_meta_restore(&saved_state, cmd_buffer);
1628 
1629    /* Set the header of the serialized data. */
1630    uint8_t header_data[2 * VK_UUID_SIZE];
1631    memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
1632    memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
1633 
1634    radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
1635 }
1636 
1637 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkDeviceAddress * pIndirectDeviceAddresses,const uint32_t * pIndirectStrides,const uint32_t * const * ppMaxPrimitiveCounts)1638 radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1639                                                const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1640                                                const VkDeviceAddress *pIndirectDeviceAddresses,
1641                                                const uint32_t *pIndirectStrides,
1642                                                const uint32_t *const *ppMaxPrimitiveCounts)
1643 {
1644    unreachable("Unimplemented");
1645 }
1646