1 /*
2 * Copyright © 2021 Bas Nieuwenhuizen
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radv_sqtt.h"
8
9 #include "meta/radv_meta.h"
10 #include "nir_builder.h"
11 #include "radv_cs.h"
12 #include "radv_entrypoints.h"
13
14 #include "radix_sort/common/vk/barrier.h"
15 #include "radix_sort/radv_radix_sort.h"
16 #include "radix_sort/shaders/push.h"
17
18 #include "bvh/build_interface.h"
19 #include "bvh/bvh.h"
20
21 #include "vk_acceleration_structure.h"
22 #include "vk_common_entrypoints.h"
23
24 static const uint32_t leaf_spv[] = {
25 #include "bvh/leaf.spv.h"
26 };
27
28 static const uint32_t leaf_always_active_spv[] = {
29 #include "bvh/leaf_always_active.spv.h"
30 };
31
32 static const uint32_t morton_spv[] = {
33 #include "bvh/morton.spv.h"
34 };
35
36 static const uint32_t lbvh_main_spv[] = {
37 #include "bvh/lbvh_main.spv.h"
38 };
39
40 static const uint32_t lbvh_generate_ir_spv[] = {
41 #include "bvh/lbvh_generate_ir.spv.h"
42 };
43
44 static const uint32_t ploc_spv[] = {
45 #include "bvh/ploc_internal.spv.h"
46 };
47
48 static const uint32_t copy_spv[] = {
49 #include "bvh/copy.spv.h"
50 };
51
52 static const uint32_t encode_spv[] = {
53 #include "bvh/encode.spv.h"
54 };
55
56 static const uint32_t encode_compact_spv[] = {
57 #include "bvh/encode_compact.spv.h"
58 };
59
60 static const uint32_t header_spv[] = {
61 #include "bvh/header.spv.h"
62 };
63
64 static const uint32_t update_spv[] = {
65 #include "bvh/update.spv.h"
66 };
67
68 #define KEY_ID_PAIR_SIZE 8
69 #define MORTON_BIT_SIZE 24
70
71 enum internal_build_type {
72 INTERNAL_BUILD_TYPE_LBVH,
73 INTERNAL_BUILD_TYPE_PLOC,
74 INTERNAL_BUILD_TYPE_UPDATE,
75 };
76
77 struct build_config {
78 enum internal_build_type internal_type;
79 bool compact;
80 bool updateable;
81 };
82
83 struct acceleration_structure_layout {
84 uint32_t geometry_info_offset;
85 uint32_t bvh_offset;
86 uint32_t leaf_nodes_offset;
87 uint32_t internal_nodes_offset;
88 uint32_t size;
89 };
90
91 struct scratch_layout {
92 uint32_t size;
93 uint32_t update_size;
94
95 uint32_t header_offset;
96
97 /* Used for UPDATE only. */
98
99 uint32_t internal_ready_count_offset;
100
101 /* Used for BUILD only. */
102
103 uint32_t sort_buffer_offset[2];
104 uint32_t sort_internal_offset;
105
106 uint32_t ploc_prefix_sum_partition_offset;
107 uint32_t lbvh_node_offset;
108
109 uint32_t ir_offset;
110 uint32_t internal_node_offset;
111 };
112
113 static struct build_config
build_config(uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info)114 build_config(uint32_t leaf_count, const VkAccelerationStructureBuildGeometryInfoKHR *build_info)
115 {
116 struct build_config config = {0};
117
118 if (leaf_count <= 4)
119 config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
120 else if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR)
121 config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
122 else if (!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR) &&
123 !(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
124 config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
125 else
126 config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
127
128 if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
129 build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
130 config.internal_type = INTERNAL_BUILD_TYPE_UPDATE;
131
132 if ((build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) &&
133 build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
134 config.updateable = true;
135
136 if (build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
137 config.compact = true;
138
139 return config;
140 }
141
142 static void
get_build_layout(struct radv_device * device,uint32_t leaf_count,const VkAccelerationStructureBuildGeometryInfoKHR * build_info,struct acceleration_structure_layout * accel_struct,struct scratch_layout * scratch)143 get_build_layout(struct radv_device *device, uint32_t leaf_count,
144 const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
145 struct acceleration_structure_layout *accel_struct, struct scratch_layout *scratch)
146 {
147 uint32_t internal_count = MAX2(leaf_count, 2) - 1;
148
149 VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
150
151 if (build_info->geometryCount) {
152 if (build_info->pGeometries)
153 geometry_type = build_info->pGeometries[0].geometryType;
154 else
155 geometry_type = build_info->ppGeometries[0]->geometryType;
156 }
157
158 uint32_t bvh_leaf_size;
159 switch (geometry_type) {
160 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
161 bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
162 break;
163 case VK_GEOMETRY_TYPE_AABBS_KHR:
164 bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
165 break;
166 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
167 bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
168 break;
169 default:
170 unreachable("Unknown VkGeometryTypeKHR");
171 }
172
173 if (accel_struct) {
174 uint64_t bvh_size = bvh_leaf_size * leaf_count + sizeof(struct radv_bvh_box32_node) * internal_count;
175 uint32_t offset = 0;
176 offset += sizeof(struct radv_accel_struct_header);
177
178 if (device->rra_trace.accel_structs) {
179 accel_struct->geometry_info_offset = offset;
180 offset += sizeof(struct radv_accel_struct_geometry_info) * build_info->geometryCount;
181 }
182 /* Parent links, which have to go directly before bvh_offset as we index them using negative
183 * offsets from there. */
184 offset += bvh_size / 64 * 4;
185
186 /* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
187 offset = ALIGN(offset, 64);
188 accel_struct->bvh_offset = offset;
189
190 /* root node */
191 offset += sizeof(struct radv_bvh_box32_node);
192
193 accel_struct->leaf_nodes_offset = offset;
194 offset += bvh_leaf_size * leaf_count;
195
196 accel_struct->internal_nodes_offset = offset;
197 /* Factor out the root node. */
198 offset += sizeof(struct radv_bvh_box32_node) * (internal_count - 1);
199
200 accel_struct->size = offset;
201 }
202
203 if (scratch) {
204 radix_sort_vk_memory_requirements_t requirements = {
205 0,
206 };
207 if (radv_device_init_accel_struct_build_state(device) == VK_SUCCESS)
208 radix_sort_vk_get_memory_requirements(device->meta_state.accel_struct_build.radix_sort, leaf_count,
209 &requirements);
210
211 uint32_t offset = 0;
212
213 uint32_t ploc_scratch_space = 0;
214 uint32_t lbvh_node_space = 0;
215
216 struct build_config config = build_config(leaf_count, build_info);
217
218 if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC)
219 ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
220 else
221 lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
222
223 scratch->header_offset = offset;
224 offset += sizeof(struct radv_ir_header);
225
226 scratch->sort_buffer_offset[0] = offset;
227 offset += requirements.keyvals_size;
228
229 scratch->sort_buffer_offset[1] = offset;
230 offset += requirements.keyvals_size;
231
232 scratch->sort_internal_offset = offset;
233 /* Internal sorting data is not needed when PLOC/LBVH are invoked,
234 * save space by aliasing them */
235 scratch->ploc_prefix_sum_partition_offset = offset;
236 scratch->lbvh_node_offset = offset;
237 offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
238
239 scratch->ir_offset = offset;
240 offset += sizeof(struct radv_ir_node) * leaf_count;
241
242 scratch->internal_node_offset = offset;
243 offset += sizeof(struct radv_ir_box_node) * internal_count;
244
245 scratch->size = offset;
246
247 if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR) {
248 uint32_t update_offset = 0;
249
250 update_offset += sizeof(radv_aabb) * leaf_count;
251 scratch->internal_ready_count_offset = update_offset;
252
253 update_offset += sizeof(uint32_t) * internal_count;
254 scratch->update_size = update_offset;
255 } else {
256 scratch->update_size = offset;
257 }
258 }
259 }
260
261 VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device,VkAccelerationStructureBuildTypeKHR buildType,const VkAccelerationStructureBuildGeometryInfoKHR * pBuildInfo,const uint32_t * pMaxPrimitiveCounts,VkAccelerationStructureBuildSizesInfoKHR * pSizeInfo)262 radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
263 const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
264 const uint32_t *pMaxPrimitiveCounts,
265 VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
266 {
267 VK_FROM_HANDLE(radv_device, device, _device);
268
269 STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
270 STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
271 STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
272 STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
273 STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
274
275 uint32_t leaf_count = 0;
276 for (uint32_t i = 0; i < pBuildInfo->geometryCount; i++)
277 leaf_count += pMaxPrimitiveCounts[i];
278
279 struct acceleration_structure_layout accel_struct;
280 struct scratch_layout scratch;
281 get_build_layout(device, leaf_count, pBuildInfo, &accel_struct, &scratch);
282
283 pSizeInfo->accelerationStructureSize = accel_struct.size;
284 pSizeInfo->updateScratchSize = scratch.update_size;
285 pSizeInfo->buildScratchSize = scratch.size;
286 }
287
288 VKAPI_ATTR VkResult VKAPI_CALL
radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device,uint32_t accelerationStructureCount,const VkAccelerationStructureKHR * pAccelerationStructures,VkQueryType queryType,size_t dataSize,void * pData,size_t stride)289 radv_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
290 const VkAccelerationStructureKHR *pAccelerationStructures,
291 VkQueryType queryType, size_t dataSize, void *pData, size_t stride)
292 {
293 unreachable("Unimplemented");
294 return VK_ERROR_FEATURE_NOT_PRESENT;
295 }
296
297 VKAPI_ATTR VkResult VKAPI_CALL
radv_BuildAccelerationStructuresKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)298 radv_BuildAccelerationStructuresKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation, uint32_t infoCount,
299 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
300 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
301 {
302 unreachable("Unimplemented");
303 return VK_ERROR_FEATURE_NOT_PRESENT;
304 }
305
306 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureInfoKHR * pInfo)307 radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
308 const VkCopyAccelerationStructureInfoKHR *pInfo)
309 {
310 unreachable("Unimplemented");
311 return VK_ERROR_FEATURE_NOT_PRESENT;
312 }
313
314 void
radv_device_finish_accel_struct_build_state(struct radv_device * device)315 radv_device_finish_accel_struct_build_state(struct radv_device *device)
316 {
317 VkDevice _device = radv_device_to_handle(device);
318 struct radv_meta_state *state = &device->meta_state;
319 struct vk_device_dispatch_table *dispatch = &device->vk.dispatch_table;
320
321 dispatch->DestroyPipeline(_device, state->accel_struct_build.copy_pipeline, &state->alloc);
322 dispatch->DestroyPipeline(_device, state->accel_struct_build.ploc_pipeline, &state->alloc);
323 dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_generate_ir_pipeline, &state->alloc);
324 dispatch->DestroyPipeline(_device, state->accel_struct_build.lbvh_main_pipeline, &state->alloc);
325 dispatch->DestroyPipeline(_device, state->accel_struct_build.leaf_pipeline, &state->alloc);
326 dispatch->DestroyPipeline(_device, state->accel_struct_build.leaf_updateable_pipeline, &state->alloc);
327 dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_pipeline, &state->alloc);
328 dispatch->DestroyPipeline(_device, state->accel_struct_build.encode_compact_pipeline, &state->alloc);
329 dispatch->DestroyPipeline(_device, state->accel_struct_build.header_pipeline, &state->alloc);
330 dispatch->DestroyPipeline(_device, state->accel_struct_build.morton_pipeline, &state->alloc);
331 dispatch->DestroyPipeline(_device, state->accel_struct_build.update_pipeline, &state->alloc);
332 radv_DestroyPipelineLayout(_device, state->accel_struct_build.copy_p_layout, &state->alloc);
333 radv_DestroyPipelineLayout(_device, state->accel_struct_build.ploc_p_layout, &state->alloc);
334 radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_generate_ir_p_layout, &state->alloc);
335 radv_DestroyPipelineLayout(_device, state->accel_struct_build.lbvh_main_p_layout, &state->alloc);
336 radv_DestroyPipelineLayout(_device, state->accel_struct_build.leaf_p_layout, &state->alloc);
337 radv_DestroyPipelineLayout(_device, state->accel_struct_build.encode_p_layout, &state->alloc);
338 radv_DestroyPipelineLayout(_device, state->accel_struct_build.header_p_layout, &state->alloc);
339 radv_DestroyPipelineLayout(_device, state->accel_struct_build.morton_p_layout, &state->alloc);
340 radv_DestroyPipelineLayout(_device, state->accel_struct_build.update_p_layout, &state->alloc);
341
342 if (state->accel_struct_build.radix_sort)
343 radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
344
345 radv_DestroyBuffer(_device, state->accel_struct_build.null.buffer, &state->alloc);
346 radv_FreeMemory(_device, state->accel_struct_build.null.memory, &state->alloc);
347 vk_common_DestroyAccelerationStructureKHR(_device, state->accel_struct_build.null.accel_struct, &state->alloc);
348 }
349
350 static VkResult
create_build_pipeline_spv(struct radv_device * device,const uint32_t * spv,uint32_t spv_size,unsigned push_constant_size,VkPipeline * pipeline,VkPipelineLayout * layout)351 create_build_pipeline_spv(struct radv_device *device, const uint32_t *spv, uint32_t spv_size,
352 unsigned push_constant_size, VkPipeline *pipeline, VkPipelineLayout *layout)
353 {
354 if (*pipeline)
355 return VK_SUCCESS;
356
357 VkDevice _device = radv_device_to_handle(device);
358
359 const VkPipelineLayoutCreateInfo pl_create_info = {
360 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
361 .setLayoutCount = 0,
362 .pushConstantRangeCount = 1,
363 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size},
364 };
365
366 VkShaderModuleCreateInfo module_info = {
367 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
368 .pNext = NULL,
369 .flags = 0,
370 .codeSize = spv_size,
371 .pCode = spv,
372 };
373
374 VkShaderModule module;
375 VkResult result =
376 device->vk.dispatch_table.CreateShaderModule(_device, &module_info, &device->meta_state.alloc, &module);
377 if (result != VK_SUCCESS)
378 return result;
379
380 if (!*layout) {
381 result = radv_CreatePipelineLayout(_device, &pl_create_info, &device->meta_state.alloc, layout);
382 if (result != VK_SUCCESS)
383 goto cleanup;
384 }
385
386 VkPipelineShaderStageCreateInfo shader_stage = {
387 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
388 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
389 .module = module,
390 .pName = "main",
391 .pSpecializationInfo = NULL,
392 };
393
394 VkComputePipelineCreateInfo pipeline_info = {
395 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
396 .stage = shader_stage,
397 .flags = 0,
398 .layout = *layout,
399 };
400
401 result = device->vk.dispatch_table.CreateComputePipelines(_device, device->meta_state.cache, 1, &pipeline_info,
402 &device->meta_state.alloc, pipeline);
403
404 cleanup:
405 device->vk.dispatch_table.DestroyShaderModule(_device, module, &device->meta_state.alloc);
406 return result;
407 }
408
409 VkResult
radv_device_init_null_accel_struct(struct radv_device * device)410 radv_device_init_null_accel_struct(struct radv_device *device)
411 {
412 const struct radv_physical_device *pdev = radv_device_physical(device);
413
414 if (pdev->memory_properties.memoryTypeCount == 0)
415 return VK_SUCCESS; /* Exit in the case of null winsys. */
416
417 VkDevice _device = radv_device_to_handle(device);
418
419 uint32_t bvh_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
420 uint32_t size = bvh_offset + sizeof(struct radv_bvh_box32_node);
421
422 VkResult result;
423
424 VkBuffer buffer = VK_NULL_HANDLE;
425 VkDeviceMemory memory = VK_NULL_HANDLE;
426 VkAccelerationStructureKHR accel_struct = VK_NULL_HANDLE;
427
428 VkBufferCreateInfo buffer_create_info = {
429 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
430 .pNext =
431 &(VkBufferUsageFlags2CreateInfoKHR){
432 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
433 .usage = VK_BUFFER_USAGE_2_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR,
434 },
435 .size = size,
436 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
437 };
438
439 result = radv_CreateBuffer(_device, &buffer_create_info, &device->meta_state.alloc, &buffer);
440 if (result != VK_SUCCESS)
441 return result;
442
443 VkBufferMemoryRequirementsInfo2 info = {
444 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
445 .buffer = buffer,
446 };
447 VkMemoryRequirements2 mem_req = {
448 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
449 };
450 vk_common_GetBufferMemoryRequirements2(_device, &info, &mem_req);
451
452 VkMemoryAllocateInfo alloc_info = {
453 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
454 .allocationSize = mem_req.memoryRequirements.size,
455 .memoryTypeIndex =
456 radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
457 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
458 };
459
460 result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
461 if (result != VK_SUCCESS)
462 return result;
463
464 VkBindBufferMemoryInfo bind_info = {
465 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
466 .buffer = buffer,
467 .memory = memory,
468 };
469
470 result = radv_BindBufferMemory2(_device, 1, &bind_info);
471 if (result != VK_SUCCESS)
472 return result;
473
474 void *data;
475 result = vk_common_MapMemory(_device, memory, 0, size, 0, &data);
476 if (result != VK_SUCCESS)
477 return result;
478
479 struct radv_accel_struct_header header = {
480 .bvh_offset = bvh_offset,
481 };
482 memcpy(data, &header, sizeof(struct radv_accel_struct_header));
483
484 struct radv_bvh_box32_node root = {
485 .children =
486 {
487 RADV_BVH_INVALID_NODE,
488 RADV_BVH_INVALID_NODE,
489 RADV_BVH_INVALID_NODE,
490 RADV_BVH_INVALID_NODE,
491 },
492 };
493
494 for (uint32_t child = 0; child < 4; child++) {
495 root.coords[child] = (radv_aabb){
496 .min.x = NAN,
497 .min.y = NAN,
498 .min.z = NAN,
499 .max.x = NAN,
500 .max.y = NAN,
501 .max.z = NAN,
502 };
503 }
504
505 memcpy((uint8_t *)data + bvh_offset, &root, sizeof(struct radv_bvh_box32_node));
506
507 vk_common_UnmapMemory(_device, memory);
508
509 VkAccelerationStructureCreateInfoKHR create_info = {
510 .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR,
511 .buffer = buffer,
512 .size = size,
513 .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR,
514 };
515
516 result = vk_common_CreateAccelerationStructureKHR(_device, &create_info, &device->meta_state.alloc, &accel_struct);
517 if (result != VK_SUCCESS)
518 return result;
519
520 device->meta_state.accel_struct_build.null.buffer = buffer;
521 device->meta_state.accel_struct_build.null.memory = memory;
522 device->meta_state.accel_struct_build.null.accel_struct = accel_struct;
523
524 return VK_SUCCESS;
525 }
526
527 VkResult
radv_device_init_accel_struct_build_state(struct radv_device * device)528 radv_device_init_accel_struct_build_state(struct radv_device *device)
529 {
530 VkResult result = VK_SUCCESS;
531 mtx_lock(&device->meta_state.mtx);
532
533 if (device->meta_state.accel_struct_build.radix_sort)
534 goto exit;
535
536 result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv),
537 sizeof(struct leaf_args),
538 &device->meta_state.accel_struct_build.leaf_updateable_pipeline,
539 &device->meta_state.accel_struct_build.leaf_p_layout);
540 if (result != VK_SUCCESS)
541 goto exit;
542
543 result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
544 &device->meta_state.accel_struct_build.leaf_pipeline,
545 &device->meta_state.accel_struct_build.leaf_p_layout);
546 if (result != VK_SUCCESS)
547 goto exit;
548
549 result = create_build_pipeline_spv(device, lbvh_main_spv, sizeof(lbvh_main_spv), sizeof(struct lbvh_main_args),
550 &device->meta_state.accel_struct_build.lbvh_main_pipeline,
551 &device->meta_state.accel_struct_build.lbvh_main_p_layout);
552 if (result != VK_SUCCESS)
553 goto exit;
554
555 result = create_build_pipeline_spv(device, lbvh_generate_ir_spv, sizeof(lbvh_generate_ir_spv),
556 sizeof(struct lbvh_generate_ir_args),
557 &device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline,
558 &device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout);
559 if (result != VK_SUCCESS)
560 goto exit;
561
562 result = create_build_pipeline_spv(device, ploc_spv, sizeof(ploc_spv), sizeof(struct ploc_args),
563 &device->meta_state.accel_struct_build.ploc_pipeline,
564 &device->meta_state.accel_struct_build.ploc_p_layout);
565 if (result != VK_SUCCESS)
566 goto exit;
567
568 result = create_build_pipeline_spv(device, encode_spv, sizeof(encode_spv), sizeof(struct encode_args),
569 &device->meta_state.accel_struct_build.encode_pipeline,
570 &device->meta_state.accel_struct_build.encode_p_layout);
571 if (result != VK_SUCCESS)
572 goto exit;
573
574 result =
575 create_build_pipeline_spv(device, encode_compact_spv, sizeof(encode_compact_spv), sizeof(struct encode_args),
576 &device->meta_state.accel_struct_build.encode_compact_pipeline,
577 &device->meta_state.accel_struct_build.encode_p_layout);
578 if (result != VK_SUCCESS)
579 goto exit;
580
581 result = create_build_pipeline_spv(device, header_spv, sizeof(header_spv), sizeof(struct header_args),
582 &device->meta_state.accel_struct_build.header_pipeline,
583 &device->meta_state.accel_struct_build.header_p_layout);
584 if (result != VK_SUCCESS)
585 goto exit;
586
587 result = create_build_pipeline_spv(device, morton_spv, sizeof(morton_spv), sizeof(struct morton_args),
588 &device->meta_state.accel_struct_build.morton_pipeline,
589 &device->meta_state.accel_struct_build.morton_p_layout);
590 if (result != VK_SUCCESS)
591 goto exit;
592
593 result = create_build_pipeline_spv(device, update_spv, sizeof(update_spv), sizeof(struct update_args),
594 &device->meta_state.accel_struct_build.update_pipeline,
595 &device->meta_state.accel_struct_build.update_p_layout);
596 if (result != VK_SUCCESS)
597 goto exit;
598
599 device->meta_state.accel_struct_build.radix_sort =
600 radv_create_radix_sort_u64(radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache);
601 exit:
602 mtx_unlock(&device->meta_state.mtx);
603 return result;
604 }
605
606 static VkResult
radv_device_init_accel_struct_copy_state(struct radv_device * device)607 radv_device_init_accel_struct_copy_state(struct radv_device *device)
608 {
609 mtx_lock(&device->meta_state.mtx);
610
611 VkResult result = create_build_pipeline_spv(device, copy_spv, sizeof(copy_spv), sizeof(struct copy_args),
612 &device->meta_state.accel_struct_build.copy_pipeline,
613 &device->meta_state.accel_struct_build.copy_p_layout);
614
615 mtx_unlock(&device->meta_state.mtx);
616 return result;
617 }
618
619 struct bvh_state {
620 uint32_t node_count;
621 uint32_t scratch_offset;
622
623 uint32_t leaf_node_count;
624 uint32_t internal_node_count;
625 uint32_t leaf_node_size;
626
627 struct acceleration_structure_layout accel_struct;
628 struct scratch_layout scratch;
629 struct build_config config;
630
631 /* Radix sort state */
632 uint32_t scatter_blocks;
633 uint32_t count_ru_scatter;
634 uint32_t histo_blocks;
635 uint32_t count_ru_histo;
636 struct rs_push_scatter push_scatter;
637 };
638
639 struct radv_bvh_batch_state {
640 bool any_compact;
641 bool any_non_compact;
642 bool any_ploc;
643 bool any_lbvh;
644 bool any_updateable;
645 bool any_non_updateable;
646 bool any_update;
647 };
648
649 static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id,uint32_t flags)650 pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
651 {
652 uint32_t geometry_id_and_flags = geometry_id;
653 if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
654 geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
655
656 return geometry_id_and_flags;
657 }
658
659 static struct radv_bvh_geometry_data
fill_geometry_data(VkAccelerationStructureTypeKHR type,struct bvh_state * bvh_state,uint32_t geom_index,const VkAccelerationStructureGeometryKHR * geometry,const VkAccelerationStructureBuildRangeInfoKHR * build_range_info)660 fill_geometry_data(VkAccelerationStructureTypeKHR type, struct bvh_state *bvh_state, uint32_t geom_index,
661 const VkAccelerationStructureGeometryKHR *geometry,
662 const VkAccelerationStructureBuildRangeInfoKHR *build_range_info)
663 {
664 struct radv_bvh_geometry_data data = {
665 .first_id = bvh_state->node_count,
666 .geometry_id = pack_geometry_id_and_flags(geom_index, geometry->flags),
667 .geometry_type = geometry->geometryType,
668 };
669
670 switch (geometry->geometryType) {
671 case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
672 assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
673
674 data.data = geometry->geometry.triangles.vertexData.deviceAddress +
675 build_range_info->firstVertex * geometry->geometry.triangles.vertexStride;
676 data.indices = geometry->geometry.triangles.indexData.deviceAddress;
677
678 if (geometry->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR)
679 data.data += build_range_info->primitiveOffset;
680 else
681 data.indices += build_range_info->primitiveOffset;
682
683 data.transform = geometry->geometry.triangles.transformData.deviceAddress;
684 if (data.transform)
685 data.transform += build_range_info->transformOffset;
686
687 data.stride = geometry->geometry.triangles.vertexStride;
688 data.vertex_format = geometry->geometry.triangles.vertexFormat;
689 data.index_format = geometry->geometry.triangles.indexType;
690 break;
691 case VK_GEOMETRY_TYPE_AABBS_KHR:
692 assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
693
694 data.data = geometry->geometry.aabbs.data.deviceAddress + build_range_info->primitiveOffset;
695 data.stride = geometry->geometry.aabbs.stride;
696 break;
697 case VK_GEOMETRY_TYPE_INSTANCES_KHR:
698 assert(type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
699
700 data.data = geometry->geometry.instances.data.deviceAddress + build_range_info->primitiveOffset;
701
702 if (geometry->geometry.instances.arrayOfPointers)
703 data.stride = 8;
704 else
705 data.stride = sizeof(VkAccelerationStructureInstanceKHR);
706 break;
707 default:
708 unreachable("Unknown geometryType");
709 }
710
711 return data;
712 }
713
714 static void
build_leaves(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states,bool updateable)715 build_leaves(VkCommandBuffer commandBuffer, uint32_t infoCount,
716 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
717 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states,
718 bool updateable)
719 {
720 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
721 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
722
723 radv_write_user_event_marker(cmd_buffer, UserEventPush, "leaves");
724
725 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
726 updateable ? device->meta_state.accel_struct_build.leaf_updateable_pipeline
727 : device->meta_state.accel_struct_build.leaf_pipeline);
728
729 for (uint32_t i = 0; i < infoCount; ++i) {
730 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
731 continue;
732 if (bvh_states[i].config.updateable != updateable)
733 continue;
734
735 VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
736
737 struct leaf_args leaf_consts = {
738 .ir = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
739 .bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.leaf_nodes_offset,
740 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
741 .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
742 };
743
744 for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
745 const VkAccelerationStructureGeometryKHR *geom =
746 pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
747
748 const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
749
750 leaf_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
751
752 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.leaf_p_layout,
753 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts);
754 radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
755
756 bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
757 bvh_states[i].node_count += build_range_info->primitiveCount;
758 }
759 }
760
761 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
762 }
763
764 static void
morton_generate(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)765 morton_generate(VkCommandBuffer commandBuffer, uint32_t infoCount,
766 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
767 enum radv_cmd_flush_bits flush_bits)
768 {
769 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
770 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
771
772 radv_write_user_event_marker(cmd_buffer, UserEventPush, "morton");
773
774 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
775 device->meta_state.accel_struct_build.morton_pipeline);
776
777 for (uint32_t i = 0; i < infoCount; ++i) {
778 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
779 continue;
780 const struct morton_args consts = {
781 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
782 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
783 .ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
784 };
785
786 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.morton_p_layout,
787 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
788 radv_unaligned_dispatch(cmd_buffer, bvh_states[i].node_count, 1, 1);
789 }
790
791 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
792
793 cmd_buffer->state.flush_bits |= flush_bits;
794 }
795
796 static void
morton_sort(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)797 morton_sort(VkCommandBuffer commandBuffer, uint32_t infoCount,
798 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
799 enum radv_cmd_flush_bits flush_bits)
800 {
801 /* Copyright 2019 The Fuchsia Authors. */
802 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
803 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
804
805 radv_write_user_event_marker(cmd_buffer, UserEventPush, "sort");
806
807 radix_sort_vk_t *rs = device->meta_state.accel_struct_build.radix_sort;
808
809 /*
810 * OVERVIEW
811 *
812 * 1. Pad the keyvals in `scatter_even`.
813 * 2. Zero the `histograms` and `partitions`.
814 * --- BARRIER ---
815 * 3. HISTOGRAM is dispatched before PREFIX.
816 * --- BARRIER ---
817 * 4. PREFIX is dispatched before the first SCATTER.
818 * --- BARRIER ---
819 * 5. One or more SCATTER dispatches.
820 *
821 * Note that the `partitions` buffer can be zeroed anytime before the first
822 * scatter.
823 */
824
825 /* How many passes? */
826 uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
827 uint32_t keyval_bits = keyval_bytes * 8;
828 uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
829 uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
830
831 for (uint32_t i = 0; i < infoCount; ++i) {
832 if (bvh_states[i].node_count)
833 bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
834 else
835 bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
836 }
837
838 /*
839 * PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
840 *
841 * Pad fractional blocks with max-valued keyvals.
842 *
843 * Zero the histograms and partitions buffer.
844 *
845 * This assumes the partitions follow the histograms.
846 */
847
848 /* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
849
850 /* How many scatter blocks? */
851 uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
852 uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
853
854 /*
855 * How many histogram blocks?
856 *
857 * Note that it's OK to have more max-valued digits counted by the histogram
858 * than sorted by the scatters because the sort is stable.
859 */
860 uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
861 uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
862
863 uint32_t pass_idx = (keyval_bytes - passes);
864
865 for (uint32_t i = 0; i < infoCount; ++i) {
866 if (!bvh_states[i].node_count)
867 continue;
868 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
869 continue;
870
871 uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
872 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
873
874 bvh_states[i].scatter_blocks = (bvh_states[i].node_count + scatter_block_kvs - 1) / scatter_block_kvs;
875 bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
876
877 bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
878 bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
879
880 /* Fill with max values */
881 if (bvh_states[i].count_ru_histo > bvh_states[i].node_count) {
882 radv_fill_buffer(cmd_buffer, NULL, NULL, keyvals_even_addr + bvh_states[i].node_count * keyval_bytes,
883 (bvh_states[i].count_ru_histo - bvh_states[i].node_count) * keyval_bytes, 0xFFFFFFFF);
884 }
885
886 /*
887 * Zero histograms and invalidate partitions.
888 *
889 * Note that the partition invalidation only needs to be performed once
890 * because the even/odd scatter dispatches rely on the the previous pass to
891 * leave the partitions in an invalid state.
892 *
893 * Note that the last workgroup doesn't read/write a partition so it doesn't
894 * need to be initialized.
895 */
896 uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
897
898 uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
899
900 radv_fill_buffer(cmd_buffer, NULL, NULL, internal_addr + rs->internal.histograms.offset + fill_base,
901 histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)), 0);
902 }
903
904 /*
905 * Pipeline: HISTOGRAM
906 *
907 * TODO(allanmac): All subgroups should try to process approximately the same
908 * number of blocks in order to minimize tail effects. This was implemented
909 * and reverted but should be reimplemented and benchmarked later.
910 */
911 vk_barrier_transfer_w_to_compute_r(commandBuffer);
912
913 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
914 rs->pipelines.named.histogram);
915
916 for (uint32_t i = 0; i < infoCount; ++i) {
917 if (!bvh_states[i].node_count)
918 continue;
919 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
920 continue;
921
922 uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
923 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
924
925 /* Dispatch histogram */
926 struct rs_push_histogram push_histogram = {
927 .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
928 .devaddr_keyvals = keyvals_even_addr,
929 .passes = passes,
930 };
931
932 vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
933 sizeof(push_histogram), &push_histogram);
934
935 vk_common_CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
936 }
937
938 /*
939 * Pipeline: PREFIX
940 *
941 * Launch one workgroup per pass.
942 */
943 vk_barrier_compute_w_to_compute_r(commandBuffer);
944
945 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, rs->pipelines.named.prefix);
946
947 for (uint32_t i = 0; i < infoCount; ++i) {
948 if (!bvh_states[i].node_count)
949 continue;
950 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
951 continue;
952
953 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
954
955 struct rs_push_prefix push_prefix = {
956 .devaddr_histograms = internal_addr + rs->internal.histograms.offset,
957 };
958
959 vk_common_CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
960 sizeof(push_prefix), &push_prefix);
961
962 vk_common_CmdDispatch(commandBuffer, passes, 1, 1);
963 }
964
965 /* Pipeline: SCATTER */
966 vk_barrier_compute_w_to_compute_r(commandBuffer);
967
968 uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
969
970 for (uint32_t i = 0; i < infoCount; i++) {
971 uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
972 uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
973 uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
974
975 bvh_states[i].push_scatter = (struct rs_push_scatter){
976 .devaddr_keyvals_even = keyvals_even_addr,
977 .devaddr_keyvals_odd = keyvals_odd_addr,
978 .devaddr_partitions = internal_addr + rs->internal.partitions.offset,
979 .devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
980 };
981 }
982
983 bool is_even = true;
984
985 while (true) {
986 uint32_t pass_dword = pass_idx / 4;
987
988 /* Bind new pipeline */
989 VkPipeline p =
990 is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
991 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
992
993 /* Update push constants that changed */
994 VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
995 : rs->pipeline_layouts.named.scatter[pass_dword].odd;
996
997 for (uint32_t i = 0; i < infoCount; i++) {
998 if (!bvh_states[i].node_count)
999 continue;
1000 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1001 continue;
1002
1003 bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
1004
1005 vk_common_CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
1006 &bvh_states[i].push_scatter);
1007
1008 vk_common_CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
1009
1010 bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
1011 }
1012
1013 /* Continue? */
1014 if (++pass_idx >= keyval_bytes)
1015 break;
1016
1017 vk_barrier_compute_w_to_compute_r(commandBuffer);
1018
1019 is_even ^= true;
1020 }
1021
1022 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1023
1024 cmd_buffer->state.flush_bits |= flush_bits;
1025 }
1026
1027 static void
lbvh_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,enum radv_cmd_flush_bits flush_bits)1028 lbvh_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1029 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1030 enum radv_cmd_flush_bits flush_bits)
1031 {
1032 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1033 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1034
1035 radv_write_user_event_marker(cmd_buffer, UserEventPush, "lbvh");
1036
1037 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1038 device->meta_state.accel_struct_build.lbvh_main_pipeline);
1039
1040 for (uint32_t i = 0; i < infoCount; ++i) {
1041 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1042 continue;
1043
1044 uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1045 uint32_t internal_node_count = MAX2(bvh_states[i].node_count, 2) - 1;
1046
1047 const struct lbvh_main_args consts = {
1048 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1049 .src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1050 .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1051 .id_count = bvh_states[i].node_count,
1052 .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1053 };
1054
1055 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.lbvh_main_p_layout,
1056 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1057 radv_unaligned_dispatch(cmd_buffer, internal_node_count, 1, 1);
1058 bvh_states[i].node_count = internal_node_count;
1059 bvh_states[i].internal_node_count = internal_node_count;
1060 }
1061
1062 cmd_buffer->state.flush_bits |= flush_bits;
1063
1064 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1065 device->meta_state.accel_struct_build.lbvh_generate_ir_pipeline);
1066
1067 for (uint32_t i = 0; i < infoCount; ++i) {
1068 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
1069 continue;
1070
1071 const struct lbvh_generate_ir_args consts = {
1072 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1073 .node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
1074 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1075 .internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1076 };
1077
1078 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.lbvh_generate_ir_p_layout,
1079 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1080 radv_unaligned_dispatch(cmd_buffer, bvh_states[i].internal_node_count, 1, 1);
1081 }
1082
1083 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1084 }
1085
1086 static void
ploc_build_internal(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states)1087 ploc_build_internal(VkCommandBuffer commandBuffer, uint32_t infoCount,
1088 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
1089 {
1090 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1091 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1092
1093 radv_write_user_event_marker(cmd_buffer, UserEventPush, "ploc");
1094
1095 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1096 device->meta_state.accel_struct_build.ploc_pipeline);
1097
1098 for (uint32_t i = 0; i < infoCount; ++i) {
1099 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
1100 continue;
1101
1102 uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
1103 uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
1104 ? bvh_states[i].scratch.sort_buffer_offset[1]
1105 : bvh_states[i].scratch.sort_buffer_offset[0];
1106
1107 const struct ploc_args consts = {
1108 .bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1109 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1110 .ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
1111 .ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset,
1112 .prefix_scan_partitions =
1113 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
1114 .internal_node_offset = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
1115 };
1116
1117 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.ploc_p_layout,
1118 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
1119 vk_common_CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
1120 }
1121
1122 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1123 }
1124
1125 static void
encode_nodes(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,bool compact)1126 encode_nodes(VkCommandBuffer commandBuffer, uint32_t infoCount,
1127 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states, bool compact)
1128 {
1129 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1130 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1131
1132 radv_write_user_event_marker(cmd_buffer, UserEventPush, "encode");
1133
1134 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1135 compact ? device->meta_state.accel_struct_build.encode_compact_pipeline
1136 : device->meta_state.accel_struct_build.encode_pipeline);
1137
1138 for (uint32_t i = 0; i < infoCount; ++i) {
1139 if (compact != bvh_states[i].config.compact)
1140 continue;
1141 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1142 continue;
1143
1144 VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1145
1146 VkGeometryTypeKHR geometry_type = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
1147
1148 /* If the geometry count is 0, then the size does not matter
1149 * because it will be multiplied with 0.
1150 */
1151 if (pInfos[i].geometryCount)
1152 geometry_type =
1153 pInfos[i].pGeometries ? pInfos[i].pGeometries[0].geometryType : pInfos[i].ppGeometries[0]->geometryType;
1154
1155 if (bvh_states[i].config.compact) {
1156 uint32_t dst_offset = bvh_states[i].accel_struct.internal_nodes_offset - bvh_states[i].accel_struct.bvh_offset;
1157 radv_update_buffer_cp(cmd_buffer,
1158 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1159 offsetof(struct radv_ir_header, dst_node_offset),
1160 &dst_offset, sizeof(uint32_t));
1161 }
1162
1163 const struct encode_args args = {
1164 .intermediate_bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
1165 .output_bvh = vk_acceleration_structure_get_va(accel_struct) + bvh_states[i].accel_struct.bvh_offset,
1166 .header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1167 .output_bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1168 .leaf_node_count = bvh_states[i].leaf_node_count,
1169 .geometry_type = geometry_type,
1170 };
1171 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.encode_p_layout,
1172 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1173
1174 struct radv_dispatch_info dispatch = {
1175 .unaligned = true,
1176 .ordered = true,
1177 .va = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset +
1178 offsetof(struct radv_ir_header, ir_internal_node_count),
1179 };
1180
1181 radv_compute_dispatch(cmd_buffer, &dispatch);
1182 }
1183 /* This is the final access to the leaf nodes, no need to flush */
1184
1185 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1186 }
1187
1188 static void
init_header(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,struct radv_bvh_batch_state * batch_state)1189 init_header(VkCommandBuffer commandBuffer, uint32_t infoCount,
1190 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1191 struct radv_bvh_batch_state *batch_state)
1192 {
1193 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1194 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1195
1196 if (batch_state->any_compact) {
1197 radv_write_user_event_marker(cmd_buffer, UserEventPush, "header");
1198
1199 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1200 device->meta_state.accel_struct_build.header_pipeline);
1201 }
1202
1203 for (uint32_t i = 0; i < infoCount; ++i) {
1204 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1205 continue;
1206 VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1207 size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
1208
1209 uint64_t instance_count =
1210 pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? bvh_states[i].leaf_node_count : 0;
1211
1212 if (bvh_states[i].config.compact) {
1213 base = offsetof(struct radv_accel_struct_header, geometry_count);
1214
1215 struct header_args args = {
1216 .src = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1217 .dst = vk_acceleration_structure_get_va(accel_struct),
1218 .bvh_offset = bvh_states[i].accel_struct.bvh_offset,
1219 .instance_count = instance_count,
1220 };
1221
1222 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.header_p_layout,
1223 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(args), &args);
1224
1225 radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
1226 }
1227
1228 struct radv_accel_struct_header header;
1229
1230 header.instance_offset = bvh_states[i].accel_struct.bvh_offset + sizeof(struct radv_bvh_box32_node);
1231 header.instance_count = instance_count;
1232 header.compacted_size = bvh_states[i].accel_struct.size;
1233
1234 header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
1235 header.copy_dispatch_size[1] = 1;
1236 header.copy_dispatch_size[2] = 1;
1237
1238 header.serialization_size =
1239 header.compacted_size +
1240 align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
1241
1242 header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
1243 sizeof(uint64_t) * header.instance_count;
1244
1245 header.build_flags = pInfos[i].flags;
1246 header.geometry_count = pInfos[i].geometryCount;
1247
1248 radv_update_buffer_cp(cmd_buffer, vk_acceleration_structure_get_va(accel_struct) + base,
1249 (const char *)&header + base, sizeof(header) - base);
1250 }
1251
1252 if (batch_state->any_compact)
1253 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1254 }
1255
1256 static void
init_geometry_infos(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,struct bvh_state * bvh_states,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1257 init_geometry_infos(VkCommandBuffer commandBuffer, uint32_t infoCount,
1258 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states,
1259 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1260 {
1261 for (uint32_t i = 0; i < infoCount; ++i) {
1262 if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
1263 continue;
1264 VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
1265
1266 uint64_t geometry_infos_size = pInfos[i].geometryCount * sizeof(struct radv_accel_struct_geometry_info);
1267
1268 struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
1269 if (!geometry_infos)
1270 continue;
1271
1272 for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1273 const VkAccelerationStructureGeometryKHR *geometry =
1274 pInfos[i].pGeometries ? pInfos[i].pGeometries + j : pInfos[i].ppGeometries[j];
1275 geometry_infos[j].type = geometry->geometryType;
1276 geometry_infos[j].flags = geometry->flags;
1277 geometry_infos[j].primitive_count = ppBuildRangeInfos[i][j].primitiveCount;
1278 }
1279
1280 radv_CmdUpdateBuffer(commandBuffer, accel_struct->buffer,
1281 accel_struct->offset + bvh_states[i].accel_struct.geometry_info_offset, geometry_infos_size,
1282 geometry_infos);
1283
1284 free(geometry_infos);
1285 }
1286 }
1287
1288 static void
update(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos,struct bvh_state * bvh_states)1289 update(VkCommandBuffer commandBuffer, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1290 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos, struct bvh_state *bvh_states)
1291 {
1292 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1293 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1294
1295 radv_write_user_event_marker(cmd_buffer, UserEventPush, "update");
1296
1297 device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1298 device->meta_state.accel_struct_build.update_pipeline);
1299
1300 for (uint32_t i = 0; i < infoCount; ++i) {
1301 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE)
1302 continue;
1303
1304 uint32_t leaf_node_count = 0;
1305 for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1306 leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1307 }
1308
1309 VK_FROM_HANDLE(vk_acceleration_structure, src_bvh, pInfos[i].srcAccelerationStructure);
1310 VK_FROM_HANDLE(vk_acceleration_structure, dst_bvh, pInfos[i].dstAccelerationStructure);
1311 struct update_args update_consts = {
1312 .src = vk_acceleration_structure_get_va(src_bvh),
1313 .dst = vk_acceleration_structure_get_va(dst_bvh),
1314 .leaf_bounds = pInfos[i].scratchData.deviceAddress,
1315 .internal_ready_count =
1316 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1317 .leaf_node_count = leaf_node_count,
1318 };
1319
1320 for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
1321 const VkAccelerationStructureGeometryKHR *geom =
1322 pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
1323
1324 const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
1325
1326 update_consts.geom_data = fill_geometry_data(pInfos[i].type, &bvh_states[i], j, geom, build_range_info);
1327
1328 vk_common_CmdPushConstants(commandBuffer, device->meta_state.accel_struct_build.update_p_layout,
1329 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(update_consts), &update_consts);
1330 radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
1331
1332 bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
1333 bvh_states[i].node_count += build_range_info->primitiveCount;
1334 }
1335 }
1336
1337 radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
1338 }
1339
1340 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkAccelerationStructureBuildRangeInfoKHR * const * ppBuildRangeInfos)1341 radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1342 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1343 const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
1344 {
1345 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1346 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1347 struct radv_meta_saved_state saved_state;
1348
1349 VkResult result = radv_device_init_accel_struct_build_state(device);
1350 if (result != VK_SUCCESS) {
1351 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1352 return;
1353 }
1354
1355 enum radv_cmd_flush_bits flush_bits =
1356 RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1357 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
1358 radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
1359
1360 radv_meta_save(&saved_state, cmd_buffer,
1361 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1362 struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
1363
1364 radv_describe_begin_accel_struct_build(cmd_buffer, infoCount);
1365
1366 struct radv_bvh_batch_state batch_state = {0};
1367
1368 for (uint32_t i = 0; i < infoCount; ++i) {
1369 uint32_t leaf_node_count = 0;
1370 for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
1371 leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
1372 }
1373
1374 get_build_layout(device, leaf_node_count, pInfos + i, &bvh_states[i].accel_struct, &bvh_states[i].scratch);
1375
1376 struct build_config config = build_config(leaf_node_count, pInfos + i);
1377 bvh_states[i].config = config;
1378
1379 if (config.compact)
1380 batch_state.any_compact = true;
1381 else
1382 batch_state.any_non_compact = true;
1383
1384 if (config.updateable)
1385 batch_state.any_updateable = true;
1386 else
1387 batch_state.any_non_updateable = true;
1388
1389 if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC) {
1390 batch_state.any_ploc = true;
1391 } else if (config.internal_type == INTERNAL_BUILD_TYPE_LBVH) {
1392 batch_state.any_lbvh = true;
1393 } else if (config.internal_type == INTERNAL_BUILD_TYPE_UPDATE) {
1394 batch_state.any_update = true;
1395 } else {
1396 unreachable("Unknown internal_build_type");
1397 }
1398
1399 if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE) {
1400 /* The internal node count is updated in lbvh_build_internal for LBVH
1401 * and from the PLOC shader for PLOC. */
1402 struct radv_ir_header header = {
1403 .min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
1404 .max_bounds = {0x80000000, 0x80000000, 0x80000000},
1405 .dispatch_size_y = 1,
1406 .dispatch_size_z = 1,
1407 .sync_data =
1408 {
1409 .current_phase_end_counter = TASK_INDEX_INVALID,
1410 /* Will be updated by the first PLOC shader invocation */
1411 .task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
1412 },
1413 };
1414
1415 radv_update_buffer_cp(cmd_buffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
1416 &header, sizeof(header));
1417 } else {
1418 /* Prepare ready counts for internal nodes */
1419 radv_fill_buffer(cmd_buffer, NULL, NULL,
1420 pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.internal_ready_count_offset,
1421 bvh_states[i].scratch.update_size - bvh_states[i].scratch.internal_ready_count_offset, 0x0);
1422 if (pInfos[i].srcAccelerationStructure != pInfos[i].dstAccelerationStructure) {
1423 VK_FROM_HANDLE(vk_acceleration_structure, src_as, pInfos[i].srcAccelerationStructure);
1424 VK_FROM_HANDLE(vk_acceleration_structure, dst_as, pInfos[i].dstAccelerationStructure);
1425
1426 VK_FROM_HANDLE(radv_buffer, src_as_buffer, src_as->buffer);
1427 VK_FROM_HANDLE(radv_buffer, dst_as_buffer, dst_as->buffer);
1428
1429 /* Copy header/metadata */
1430 radv_copy_buffer(cmd_buffer, src_as_buffer->bo, dst_as_buffer->bo, src_as_buffer->offset + src_as->offset,
1431 dst_as_buffer->offset + dst_as->offset, bvh_states[i].accel_struct.bvh_offset);
1432 }
1433 }
1434 }
1435
1436 cmd_buffer->state.current_event_type = EventInternalUnknown;
1437
1438 if (batch_state.any_lbvh || batch_state.any_ploc) {
1439 if (batch_state.any_non_updateable)
1440 build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states, false);
1441 if (batch_state.any_updateable)
1442 build_leaves(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states, true);
1443
1444 cmd_buffer->state.flush_bits |= flush_bits;
1445
1446 morton_generate(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1447
1448 morton_sort(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1449
1450 cmd_buffer->state.flush_bits |= flush_bits;
1451
1452 if (batch_state.any_lbvh)
1453 lbvh_build_internal(commandBuffer, infoCount, pInfos, bvh_states, flush_bits);
1454
1455 if (batch_state.any_ploc)
1456 ploc_build_internal(commandBuffer, infoCount, pInfos, bvh_states);
1457
1458 cmd_buffer->state.flush_bits |= flush_bits;
1459
1460 if (batch_state.any_non_compact)
1461 encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, false);
1462
1463 if (batch_state.any_compact)
1464 encode_nodes(commandBuffer, infoCount, pInfos, bvh_states, true);
1465
1466 cmd_buffer->state.flush_bits |= flush_bits;
1467 }
1468
1469 init_header(commandBuffer, infoCount, pInfos, bvh_states, &batch_state);
1470
1471 if (device->rra_trace.accel_structs)
1472 init_geometry_infos(commandBuffer, infoCount, pInfos, bvh_states, ppBuildRangeInfos);
1473
1474 if (batch_state.any_update)
1475 update(commandBuffer, infoCount, pInfos, ppBuildRangeInfos, bvh_states);
1476
1477 radv_describe_end_accel_struct_build(cmd_buffer);
1478
1479 free(bvh_states);
1480 radv_meta_restore(&saved_state, cmd_buffer);
1481 }
1482
1483 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureInfoKHR * pInfo)1484 radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
1485 {
1486 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1487 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1488 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1489 VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1490 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1491 struct radv_meta_saved_state saved_state;
1492
1493 VkResult result = radv_device_init_accel_struct_copy_state(device);
1494 if (result != VK_SUCCESS) {
1495 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1496 return;
1497 }
1498
1499 radv_meta_save(&saved_state, cmd_buffer,
1500 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1501
1502 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1503 device->meta_state.accel_struct_build.copy_pipeline);
1504
1505 struct copy_args consts = {
1506 .src_addr = vk_acceleration_structure_get_va(src),
1507 .dst_addr = vk_acceleration_structure_get_va(dst),
1508 .mode = RADV_COPY_MODE_COPY,
1509 };
1510
1511 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1512 device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1513 sizeof(consts), &consts);
1514
1515 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
1516 VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1517
1518 radv_indirect_dispatch(
1519 cmd_buffer, src_buffer->bo,
1520 vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1521 radv_meta_restore(&saved_state, cmd_buffer);
1522 }
1523
1524 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,const VkAccelerationStructureVersionInfoKHR * pVersionInfo,VkAccelerationStructureCompatibilityKHR * pCompatibility)1525 radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
1526 const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
1527 VkAccelerationStructureCompatibilityKHR *pCompatibility)
1528 {
1529 VK_FROM_HANDLE(radv_device, device, _device);
1530 const struct radv_physical_device *pdev = radv_device_physical(device);
1531 bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
1532 memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
1533 *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
1534 : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
1535 }
1536
1537 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1538 radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1539 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1540 {
1541 unreachable("Unimplemented");
1542 return VK_ERROR_FEATURE_NOT_PRESENT;
1543 }
1544
1545 VKAPI_ATTR VkResult VKAPI_CALL
radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1546 radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
1547 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1548 {
1549 unreachable("Unimplemented");
1550 return VK_ERROR_FEATURE_NOT_PRESENT;
1551 }
1552
1553 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,const VkCopyMemoryToAccelerationStructureInfoKHR * pInfo)1554 radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
1555 const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
1556 {
1557 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1558 VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
1559 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1560 struct radv_meta_saved_state saved_state;
1561
1562 VkResult result = radv_device_init_accel_struct_copy_state(device);
1563 if (result != VK_SUCCESS) {
1564 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1565 return;
1566 }
1567
1568 radv_meta_save(&saved_state, cmd_buffer,
1569 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1570
1571 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1572 device->meta_state.accel_struct_build.copy_pipeline);
1573
1574 const struct copy_args consts = {
1575 .src_addr = pInfo->src.deviceAddress,
1576 .dst_addr = vk_acceleration_structure_get_va(dst),
1577 .mode = RADV_COPY_MODE_DESERIALIZE,
1578 };
1579
1580 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1581 device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1582 sizeof(consts), &consts);
1583
1584 vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
1585 radv_meta_restore(&saved_state, cmd_buffer);
1586 }
1587
1588 VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,const VkCopyAccelerationStructureToMemoryInfoKHR * pInfo)1589 radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
1590 const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
1591 {
1592 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
1593 VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
1594 VK_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
1595 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1596 const struct radv_physical_device *pdev = radv_device_physical(device);
1597 struct radv_meta_saved_state saved_state;
1598
1599 VkResult result = radv_device_init_accel_struct_copy_state(device);
1600 if (result != VK_SUCCESS) {
1601 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1602 return;
1603 }
1604
1605 radv_meta_save(&saved_state, cmd_buffer,
1606 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
1607
1608 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1609 device->meta_state.accel_struct_build.copy_pipeline);
1610
1611 const struct copy_args consts = {
1612 .src_addr = vk_acceleration_structure_get_va(src),
1613 .dst_addr = pInfo->dst.deviceAddress,
1614 .mode = RADV_COPY_MODE_SERIALIZE,
1615 };
1616
1617 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1618 device->meta_state.accel_struct_build.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1619 sizeof(consts), &consts);
1620
1621 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
1622 VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, NULL);
1623
1624 radv_indirect_dispatch(
1625 cmd_buffer, src_buffer->bo,
1626 vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
1627 radv_meta_restore(&saved_state, cmd_buffer);
1628
1629 /* Set the header of the serialized data. */
1630 uint8_t header_data[2 * VK_UUID_SIZE];
1631 memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
1632 memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
1633
1634 radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
1635 }
1636
1637 VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer,uint32_t infoCount,const VkAccelerationStructureBuildGeometryInfoKHR * pInfos,const VkDeviceAddress * pIndirectDeviceAddresses,const uint32_t * pIndirectStrides,const uint32_t * const * ppMaxPrimitiveCounts)1638 radv_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
1639 const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
1640 const VkDeviceAddress *pIndirectDeviceAddresses,
1641 const uint32_t *pIndirectStrides,
1642 const uint32_t *const *ppMaxPrimitiveCounts)
1643 {
1644 unreachable("Unimplemented");
1645 }
1646