/* * Copyright © 2022 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /** * This file contains a redefinition of structures defined in the GRL library. * We need to have those structures defined to allocate & prepare data for * the OpenCL kernels building acceleration structures. Unfortunately because * of C++ & OpenCL assumptions in GRL, it's no possible to just include GRL * header files directly so we have to redefine stuff here. */ #ifndef GRL_STRUCTS_H #define GRL_STRUCTS_H #include "GRLStructs.h" #include "GRLRTASCommon.h" struct MKBuilderState { qword geomDesc_buffer; qword build_primref_buffer; qword build_globals; qword bvh_buffer; dword leaf_type; dword leaf_size; }; #define PREFIX_MK_STATE(prefix, obj) \ (struct prefix##_MKBuilderState) { \ .geomDesc_buffer = (obj).geomDesc_buffer, \ .build_primref_buffer = (obj).build_primref_buffer, \ .build_globals = (obj).build_globals, \ .bvh_buffer = (obj).bvh_buffer, \ .leaf_type = (obj).leaf_type, \ .leaf_size = (obj).leaf_size, \ } struct MKSizeEstimate { dword numTriangles; dword numProcedurals; dword numPrimitives; dword numMeshes; dword numBuildPrimitives; dword numPrimitivesToSplit; dword instance_descs_start; dword geo_meta_data_start; dword node_data_start; dword leaf_data_start; dword procedural_data_start; dword back_pointer_start; dword sizeTotal; dword updateScratchSizeTotal; dword fatleaf_table_start; dword innernode_table_start; dword max_fatleaves; size_t max_instance_leafs; size_t max_inner_nodes; size_t leaf_data_size; size_t min_primitives; size_t max_primitives; }; #define PREFIX_MK_SIZE(prefix, obj) \ (struct prefix##_MKSizeEstimate) { \ .numTriangles = (obj).numTriangles, \ .numProcedurals = (obj).numProcedurals, \ .numPrimitives = (obj).numPrimitives, \ .numMeshes = (obj).numMeshes, \ .numBuildPrimitives = (obj).numBuildPrimitives, \ .numPrimitivesToSplit = (obj).numPrimitivesToSplit, \ .instance_descs_start = (obj).instance_descs_start, \ .geo_meta_data_start = (obj).geo_meta_data_start, \ .node_data_start = (obj).node_data_start, \ .leaf_data_start = (obj).leaf_data_start, \ .procedural_data_start = (obj).procedural_data_start, \ .back_pointer_start = (obj).back_pointer_start, \ .sizeTotal = (obj).sizeTotal, \ .updateScratchSizeTotal = (obj).updateScratchSizeTotal, \ .fatleaf_table_start = (obj).fatleaf_table_start, \ .innernode_table_start = (obj).innernode_table_start, \ .max_fatleaves = (obj).max_fatleaves, \ } typedef struct AABB { float lower[4]; float upper[4]; } AABB; struct Globals { struct AABB centroidBounds; unsigned int build_record_start; unsigned int numPrimitives; unsigned int leafPrimType; unsigned int leafSize; unsigned int numSplittedPrimitives; unsigned int numBuildRecords; // spatial split sate unsigned int numOriginalPrimitives; float presplitPrioritySum; float probThreshold; // binned-sah bfs state unsigned int counter; unsigned int numBuildRecords_extended; // sync variable used for global-sync on work groups unsigned int sync; /* morton code builder state */ unsigned int shift; // used by adaptive mc-builder unsigned int shift_mask; // used by adaptive mc-builder unsigned int binary_hierarchy_root; unsigned int p0_allocated_num; unsigned int p0_created_num; unsigned int morton_sort_in_flight; unsigned int sort_iterations; gpuva_t binary_hierarchy_buffer; // pointer to the binary morton code hierarchy. Stashed here as a debug aid }; typedef struct BVHBase { // TODO: Implement the "copy-first-node" trick... duplicate root node here uint64_t rootNodeOffset; uint32_t reserved; uint32_t nodeDataCur; // nodeDataStart is sizeof(BVHBase) / 64 = BVH_ROOT_NODE_OFFSET / 64 uint32_t quadLeafStart; uint32_t quadLeafCur; uint32_t proceduralDataStart; uint32_t proceduralDataCur; uint32_t instanceLeafStart; uint32_t instanceLeafEnd; uint32_t backPointerDataStart; // uint32_t refitTreeletsDataStart; // refit structs uint32_t refitStartPointDataStart; // uint32_t BVHDataEnd; // number of bottom treelets // if 1, then the bottom treelet is also tip treelet uint32_t refitTreeletCnt; uint32_t refitTreeletCnt2; // always 0, used for atomic updates // data layout: // @backPointerDataStart // 'backpointer' - a dword per inner node. // The bits are used as follows: // 2:0 --> Used as a refit counter during BVH refitting. MBZ // 5:3 --> Number of children // 31:6 --> Index of the parent node in the internal node array // The root node has a parent index of all ones // @refitTreeletsDataStart // RefitTreelet[], the last treelet is for top treelet all previous are for bottom // @refitStartPointDataStart // for each treelet T there is [T.startpoint_offset, T.numStartpoints) interval of startpoints here in that space // @backPointerDataEnd uint32_t fatLeafCount; // number of internal nodes which are "fat-leaves" uint32_t innerCount; // number of internal nodes which are true inner nodes (all internalNode children) uint32_t fatLeafTableStart; uint32_t innerTableStart; uint32_t _pad[12]; struct RTASMetaData Meta; } BVHBase; struct BatchedInitGlobalsData { qword p_build_globals; qword p_bvh_buffer; dword numPrimitives; dword numGeometries; dword numInstances; dword instance_descs_start; dword geo_meta_data_start; dword node_data_start; dword leaf_data_start; dword procedural_data_start; dword back_pointer_start; dword sizeTotal; dword leafType; dword leafSize; dword fatleaf_table_start; dword innernode_table_start; }; #define BFS_NUM_BINS 16 #define BFS_NUM_VCONTEXTS 256 #define BFS_MAX_DEPTH 32 #define QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM 16384 struct BFS_Split { float sah; int dim; int pos; }; struct BFS_BinInfo { float min_max[18 * BFS_NUM_BINS]; // layout: bins[axis][num_bins][6] // The 6 are lower(xyz) and -upper(xyz) // bins use negated-max so that we can use vectorized mins instead of min/max pairs uint counts[3 * BFS_NUM_BINS]; }; struct SAHBuildGlobals { qword p_primref_index_buffers; qword p_primrefs_buffer; qword p_bvh2; qword p_globals; // TODO: deprecate this qword p_bvh_base; gpuva_t p_qnode_root_buffer; dword flags; // bit 1 is 'alloc_backpointers'. bit 2 is 'need_masks' dword num_primrefs; dword leaf_size; dword leaf_type; dword root_buffer_num_produced; dword root_buffer_num_produced_hi; dword root_buffer_num_consumed; dword root_buffer_num_consumed_hi; dword root_buffer_num_to_consume; dword root_buffer_num_to_consume_hi; }; typedef union LRBounds { struct { struct AABB3f left_centroid_bounds; struct AABB3f left_geom_bounds; struct AABB3f right_centroid_bounds; struct AABB3f right_geom_bounds; } boxes; struct { float Array[24]; } scalars; } LRBounds; struct VContext { uint dispatch_primref_begin; // range of primrefs for this task uint dispatch_primref_end; uint bvh2_root; // BVH2 root node for this task uint tree_depth; // depth of this node in the tree uint num_left; // primref counts uint num_right; uint lr_mask; // lower 8b : left mask. upper 8b : right mask uint batch_index; // pass1 global working state and output struct BFS_Split split; struct BFS_BinInfo global_bin_info; // pass2 global working state and output LRBounds lr_bounds; }; struct BFSDispatchRecord { ushort batch_index; ushort context_id; }; struct BFSDispatchQueue { uint num_dispatches; uint wg_count[BFS_NUM_VCONTEXTS]; struct BFSDispatchRecord records[BFS_NUM_VCONTEXTS]; }; struct BFS1SpillStackEntry { uint primref_begin; uint primref_end; uint bvh2_root; ushort tree_depth; ushort batch_index; }; struct BFS1SpillStack { uint size; struct BFS1SpillStackEntry entries[BFS_NUM_VCONTEXTS * BFS_MAX_DEPTH]; }; struct QNodeGlobalRootBufferEntry { uint bvh2_node; uint qnode; uint build_idx; uint _pad; }; struct QNodeGlobalRootBuffer { uint curr_entries_offset; // we use "entries" as two buffers, so offset is either 0 or QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM struct QNodeGlobalRootBufferEntry entries[QNODE_GLOBAL_ROOT_BUFFER_MIN_ENTRIES_NUM * 2]; }; struct DFSDispatchRecord { uint primref_base; uint bvh2_base; uint batch_index; ushort num_primrefs; ushort tree_depth; }; struct DFSDispatchQueue { struct DFSDispatchRecord records[BFS_NUM_VCONTEXTS * 2]; }; #define VCONTEXT_STATE_EXECUTING 0 #define VCONTEXT_STATE_UNALLOCATED 1 union SchedulerUnion { struct VContextScheduler { ///////////////////////////////////////////////////////////// // State data used for communication with command streamer // NOTE: This part must match definition in 'new_sah_builder.grl' ///////////////////////////////////////////////////////////// dword num_bfs_wgs; dword num_dfs_wgs; dword scheduler_postsync; dword _pad1; dword num_trivial_builds; // number of trivial builds (#primrefs < leaf_size). dword num_single_builds; // number of single-wg builds (#primrefs < threshold) dword batched_build_wg_count; // number of wgs to dispatch for initial BFS pass dword batched_build_loop_mask; // value is 0 if #builds <= #contexts. else 1 command streamer uses this as a loop condition ///////////////////////////////////////////////////////////// dword batched_build_count; // number of batched builds in the SAHBuildGlobals buffer dword batched_build_offset; // location of the first batched-build in the SAHBuildGlobals buffer dword vcontext_state[BFS_NUM_VCONTEXTS]; struct BFSDispatchQueue bfs_queue; struct DFSDispatchQueue dfs_queue; struct VContext contexts[BFS_NUM_VCONTEXTS]; struct BFS1SpillStack bfs2_spill_stack; } vContextScheduler; struct QnodeScheduler { dword num_qnode_grb_curr_entries; dword num_qnode_grb_new_entries; dword scheduler_postsync; dword _pad1; dword num_trivial_builds; // number of trivial builds (#primrefs < leaf_size). dword num_single_builds; // number of single-wg builds (#primrefs < threshold) dword batched_builds_to_process; dword num_max_qnode_global_root_buffer_entries; // number of maximum entries to global root buffer ///////////////////////////////////////////////////////////// dword batched_build_count; // number of batched builds in the SAHBuildGlobals buffer dword batched_build_offset; // location of the first batched-build in the SAHBuildGlobals buffer struct QNodeGlobalRootBuffer qnode_global_root_buffer; } qnodeScheduler; }; struct BVH2Node { struct AABB3f box; uint meta_u; // leaf: primref start. inner: offset from node to its first child uint meta_ss; //ushort meta_s; // leaf: primref count. inner: offset from first to second child, in nodes //uchar is_inner; // 1 if inner, 0 if leaf //uchar mask; }; struct BVH2 { uint num_nodes; uint _pad[7]; // align to 32B }; struct BatchedBLSDispatchEntry { ///////////////////////////////////////////////////////////// // State data used for communication with command streamer // NOTE: This part must match definition in 'msb_radix_bitonic_sort.grl' ///////////////////////////////////////////////////////////// qword p_data_buffer; qword num_elements; // number of elements in p_data_buffer }; struct SAHBuildArgsBatchable { qword p_globals_ptrs; qword p_scheduler; qword p_buffers_info; qword p_sah_globals; dword num_max_qnode_global_root_buffer_entries; dword num_builds; }; #define PREFIX_MK_SAH_BUILD_ARGS_BATCHABLE(prefix, obj) \ (struct prefix##_SAHBuildArgsBatchable) { \ .p_globals_ptrs = (obj).p_globals_ptrs, \ .p_scheduler = (obj).p_scheduler, \ .p_buffers_info = (obj).p_buffers_info, \ .p_sah_globals = (obj).p_sah_globals, \ .num_max_qnode_global_root_buffer_entries = \ (obj).num_max_qnode_global_root_buffer_entries, \ .num_builds = (obj).num_builds, \ } struct SAHBuildBuffersInfo { gpuva_t p_globals; gpuva_t p_primref_index_buffers; gpuva_t p_primrefs_buffer; gpuva_t p_bvh2; gpuva_t p_bvh_base; gpuva_t p_qnode_root_buffer; dword sah_globals_flags; dword _pad; gpuva_t _pad2; }; #endif /* GRL_STRUCTS_H */