1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8#include "api_interface.h" 9#include "common.h" 10#include "instance.h" 11#include "misc_shared.h" 12#include "mem_utils.h" 13 14#define DBG(x) 15#define ENABLE_CHECKS 0 16 17#define CACHELINE_SIZE 64 18#define CACHELINE_PER_BLOCK 4 19#define BLOCK_SIZE 256 // = CACHELINE_SIZE * CACHELINE_PER_BLOCK; 20 21GRL_INLINE 22uint32_t getGeomDescPrimitiveCountAsUint32t(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t index) 23{ 24 return (uint32_t)GRL_get_primitive_count(&geomDesc[index]); 25} 26 27GRL_INLINE 28uint32_t getGeomDescTypeAndFlagsAsUint32t(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t index) 29{ 30 return (uint32_t)GRL_get_Type(&geomDesc[index]) | 31 (((uint32_t)GRL_get_Flags(&geomDesc[index])) << 16); 32} 33 34GRL_INLINE 35uint64_t getGeomDescAsUint64t(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t index) 36{ 37 return (uint64_t)getGeomDescPrimitiveCountAsUint32t(geomDesc, index) | 38 (((uint64_t)getGeomDescTypeAndFlagsAsUint32t(geomDesc, index)) << 32); 39} 40 41// assummed: 42// dst is always 64 bytes alligned 43GRL_INLINE 44void copyGeoMetaData(global char* dst, global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t size, uint numGroups) 45{ 46 uint taskId = get_group_id(0); 47 uint localId = get_sub_group_local_id(); 48 49 uint cachelinedSize = (size) & (~(CACHELINE_SIZE-1)); 50 51 uint reminderOffset = cachelinedSize; 52 uint reminderQWSize = (size - reminderOffset) >> 3; 53 54 uint tailCacheLines = cachelinedSize >> 6; // divide by CACHELINE_SIZE 55 uint reversedTaskId = (uint)(-(((int)taskId) - ((int)numGroups-1))); 56 if (reversedTaskId == tailCacheLines && localId < reminderQWSize) 57 { 58 uint reminderOffsetQW = reminderOffset >> 3; 59 global uint64_t* dstQW = (global uint64_t*)(dst); 60 dstQW[localId + reminderOffsetQW] = getGeomDescAsUint64t(geomDesc, localId + reminderOffsetQW); 61 } 62 63 uint numCacheLines = cachelinedSize >> 6; 64 while (taskId < numCacheLines) 65 { 66 uint byteOffset = taskId * CACHELINE_SIZE; 67 uint geoIdFromOffset = (byteOffset >> 3) + (localId >> 1); 68 69 uint32_t data = 0; 70 if (localId & 1) 71 { 72 data = getGeomDescTypeAndFlagsAsUint32t(geomDesc, geoIdFromOffset); 73 } 74 else 75 { 76 data = getGeomDescPrimitiveCountAsUint32t(geomDesc, geoIdFromOffset); 77 } 78 CacheLineSubgroupWrite(dst + byteOffset, data); 79 80 taskId += numGroups; 81 } 82} 83 84GRL_INLINE 85uint groupCountForInstancesCopySize(uint size) 86{ 87 return (size >> 8) + 3; 88} 89 90GRL_INLINE 91uint groupCountForGeoMetaDataCopySize(uint size) 92{ 93 return (size >> 6) + 1; 94} 95 96GRL_ANNOTATE_IGC_DO_NOT_SPILL 97__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 98__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 99void kernel copy_instances(global char* dest, global char* instancesArray, uint64_t size) 100{ 101 // global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart); 102 copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size)); 103} 104 105GRL_ANNOTATE_IGC_DO_NOT_SPILL 106__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 107__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 108void kernel copy_instances_indirect(global char* dest, global char* instancesArray, global const struct IndirectBuildRangeInfo* const indirect_data) 109{ 110 uint64_t size = indirect_data->primitiveCount * sizeof(InstanceDesc); 111 instancesArray += indirect_data->primitiveOffset; 112 uint tid = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH; 113 if (tid == 0) 114 { 115 struct BVHBase* bvh = (struct BVHBase*)dest; 116 bvh->Meta.instanceCount = indirect_data->primitiveCount; 117 } 118 copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size)); 119} 120 121GRL_ANNOTATE_IGC_DO_NOT_SPILL 122__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 123__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 124void kernel copy_instance_ptrs(global char* dest, global uint64_t* arrayOfPtrs, uint64_t size) 125{ 126 //global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart); 127 copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size)); 128} 129 130GRL_ANNOTATE_IGC_DO_NOT_SPILL 131__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 132__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 133void kernel copy_instance_ptrs_indirect(global char* dest, global uint64_t* arrayOfPtrs, global struct IndirectBuildRangeInfo const * const indirect_data) 134{ 135 uint64_t size = indirect_data->primitiveCount * sizeof(InstanceDesc); 136 arrayOfPtrs += indirect_data->primitiveOffset; 137 uint tid = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH; 138 if (tid == 0) 139 { 140 struct BVHBase* bvh = (struct BVHBase*)dest; 141 bvh->Meta.instanceCount = indirect_data->primitiveCount; 142 } 143 copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size)); 144} 145 146GRL_ANNOTATE_IGC_DO_NOT_SPILL 147__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 148__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 149void kernel copy_instances_base_ptr(global BVHBase* bvh, global char* instancesArray, uint64_t size) 150{ 151 global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart); 152 copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size)); 153} 154 155GRL_ANNOTATE_IGC_DO_NOT_SPILL 156__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 157__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 158void kernel copy_instances_base_ptr_indirect(global BVHBase* bvh, global char* instancesArray, global struct IndirectBuildRangeInfo const * const indirect_data) 159{ 160 global char* dest = (global char*)((unsigned long)bvh + bvh->Meta.instanceDescsStart); 161 uint64_t size = indirect_data->primitiveCount * sizeof(InstanceDesc); 162 instancesArray += indirect_data->primitiveOffset; 163 copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size)); 164} 165 166GRL_ANNOTATE_IGC_DO_NOT_SPILL 167__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 168__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 169void kernel copy_instance_ptrs_base_ptr(global BVHBase* bvh, global uint64_t* arrayOfPtrs, uint64_t size) 170{ 171 global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart); 172 copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size)); 173} 174 175GRL_ANNOTATE_IGC_DO_NOT_SPILL 176__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 177__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 178void kernel copy_instance_ptrs_base_ptr_indirect(global BVHBase* bvh, global uint64_t* arrayOfPtrs, global struct IndirectBuildRangeInfo const * const indirect_data) 179{ 180 global char* dest = (global char*)((unsigned long)bvh + bvh->Meta.instanceDescsStart); 181 uint64_t size = indirect_data->primitiveCount * sizeof(InstanceDesc); 182 arrayOfPtrs += indirect_data->primitiveOffset; 183 copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size)); 184} 185 186GRL_ANNOTATE_IGC_DO_NOT_SPILL 187__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) 188__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH))) 189void kernel copy_geo_meta_data(global char* dest, global char* src, uint64_t size) 190{ 191 //global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.geoDescsStart); 192 global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc = (global GRL_RAYTRACING_GEOMETRY_DESC *)((unsigned long)src); 193 copyGeoMetaData(dest, geomDesc, size, groupCountForGeoMetaDataCopySize(size)); 194} 195 196GRL_ANNOTATE_IGC_DO_NOT_SPILL 197__attribute__( ( reqd_work_group_size( MAX_HW_SIMD_WIDTH, 1, 1 ) ) ) 198__attribute__( ( intel_reqd_sub_group_size( MAX_HW_SIMD_WIDTH ) ) ) 199void kernel copy_geo_descs_indirect_build(global char* dest, global char* src, global struct IndirectBuildRangeInfo const * const indirect_data, uint numGeometries) 200{ 201 uint32_t gid = get_local_id(0) + get_group_id(0) * get_local_size(0); 202 if (gid < numGeometries) { 203 global GRL_RAYTRACING_GEOMETRY_DESC* dstDesc = (global GRL_RAYTRACING_GEOMETRY_DESC*)(dest); 204 global GRL_RAYTRACING_GEOMETRY_DESC* srcDesc = (global GRL_RAYTRACING_GEOMETRY_DESC*)(src); 205 206 GRL_RAYTRACING_GEOMETRY_DESC geo = srcDesc[gid]; 207 208 uint primitiveCount = indirect_data[gid].primitiveCount; 209 uint primitiveOffset = indirect_data[gid].primitiveOffset; 210 uint firstVertex = indirect_data[gid].firstVertex; 211 uint transformOffset = indirect_data[gid].transformOffset; 212 213 if (srcDesc[gid].Type == GEOMETRY_TYPE_TRIANGLES) 214 { 215 if (geo.Desc.Triangles.IndexFormat == INDEX_FORMAT_NONE) 216 { 217 geo.Desc.Triangles.VertexCount = primitiveCount * 3; 218 geo.Desc.Triangles.pVertexBuffer += primitiveOffset 219 + firstVertex * geo.Desc.Triangles.VertexBufferByteStride; 220 } 221 else 222 { 223 geo.Desc.Triangles.IndexCount = primitiveCount * 3; 224 geo.Desc.Triangles.pIndexBuffer += primitiveOffset; 225 geo.Desc.Triangles.pVertexBuffer += firstVertex * geo.Desc.Triangles.VertexBufferByteStride; 226 } 227 if (geo.Desc.Triangles.pTransformBuffer) { 228 geo.Desc.Triangles.pTransformBuffer += transformOffset; 229 } 230 } else { 231 // GEOMETRY_TYPE_PROCEDURAL 232 geo.Desc.Procedural.AABBCount = primitiveCount; 233 geo.Desc.Procedural.pAABBs_GPUVA += primitiveOffset; 234 } 235 236 dstDesc[gid] = geo; 237 } 238} 239 240GRL_ANNOTATE_IGC_DO_NOT_SPILL 241__attribute__((reqd_work_group_size(1, 1, 1))) void kernel batched_init_globals(global struct BatchedInitGlobalsData *data) 242{ 243 uint groupID = get_group_id(0); 244 245 struct BatchedInitGlobalsData entry = data[groupID]; 246 247 global struct Globals* globals = (global struct Globals*)entry.p_build_globals; 248 global char *bvh_mem = (global char*)entry.p_bvh_buffer; 249 uint numPrimitives = entry.numPrimitives; 250 uint numGeometries = entry.numGeometries; 251 uint numInstances = entry.numInstances; 252 uint instance_descs_start = entry.instance_descs_start; 253 uint geo_meta_data_start = entry.geo_meta_data_start; 254 uint node_data_start = entry.node_data_start; 255 uint quad_data_start = entry.leaf_data_start; 256 uint instance_data_start = entry.leaf_data_start; 257 uint procedural_data_start = entry.procedural_data_start; 258 uint back_pointer_start = entry.back_pointer_start; 259 uint build_record_start = entry.leaf_data_start; 260 uint totalBytes = entry.sizeTotal; 261 uint leafPrimType = entry.leafType; 262 uint leafSize = entry.leafSize; 263 264 uint root_node_offset = node_data_start; 265 struct BVHBase *base = (struct BVHBase *)bvh_mem; 266 267 base->Meta.instanceCount = numInstances; 268 base->Meta.geoCount = numGeometries; 269 base->Meta.instanceDescsStart = instance_descs_start; 270 base->Meta.geoDescsStart = geo_meta_data_start; 271 base->Meta.allocationSize = totalBytes; 272 // This doesnt work correctly 273 //ERROR_INFO initErr = { 0, 0, 0, 0xAAABBAAA }; 274 //base->Meta.errors = initErr; 275 base->Meta.errors.type = 0; 276 base->Meta.errors.offset_in_BVH = 0; //in 64B units 277 base->Meta.errors.when = 0; 278 base->Meta.errors.reserved = 0xAAABBAAA; 279 280 base->nodeDataCur = node_data_start / 64; 281 base->quadLeafStart = quad_data_start / 64; 282 base->quadLeafCur = quad_data_start / 64; 283 base->instanceLeafStart = instance_data_start / 64; 284 base->instanceLeafEnd = instance_data_start / 64; 285 base->proceduralDataStart = procedural_data_start / 64; 286 base->proceduralDataCur = procedural_data_start / 64; 287 base->backPointerDataStart = back_pointer_start / 64; 288 base->refitTreeletsDataStart = totalBytes / 64; 289 base->refitStartPointDataStart = totalBytes / 64; 290 base->BVHDataEnd = totalBytes / 64; 291 base->refitTreeletCnt = 0; 292 base->refitTreeletCnt2 = 0; 293 base->rootNodeOffset = root_node_offset; 294 295 base->fatLeafCount = 0; 296 base->fatLeafTableStart = entry.fatleaf_table_start / 64; 297 base->innerCount = 0; 298 base->innerTableStart = entry.innernode_table_start / 64; 299 base->quadLeftoversCountNewAtomicUpdate = 0; 300 base->quadTableSizeNewAtomicUpdate = 0; 301 base->quadIndicesDataStart = entry.quad_indices_data_start / 64; 302 303 if (back_pointer_start != totalBytes) 304 { 305 BackPointers* back_pointers = BVHBase_GetBackPointers(base); 306 uint root_node_idx = root_node_offset - node_data_start; 307 global uint *root_node_backpointer = (global uint *)InnerNode_GetBackPointer(back_pointers,root_node_idx); 308 *root_node_backpointer = ((uint)-1) << 6; 309 } 310 311 AABB3f_init(&base->Meta.bounds); 312 AABB_init(&globals->centroidBounds); 313 314 globals->build_record_start = build_record_start; 315 316 globals->numBuildRecords = 0; 317 globals->numBuildRecords_extended = 0; 318 globals->numPrimitives = numPrimitives; 319 globals->numSplittedPrimitives = 0; 320 globals->sync = 0; 321 globals->probThreshold = 0.0f; 322 globals->leafPrimType = leafPrimType; 323 globals->leafSize = leafSize; 324} 325 326 327 328// This is temporary WA for mock in DXR 329GRL_ANNOTATE_IGC_DO_NOT_SPILL 330__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) void kernel copy_mock(global char *dest, 331 global char *src, 332 uint32_t size) 333{ 334 uint32_t globalId = get_local_id(0) + get_group_id(0) * get_local_size(0); 335 uint32_t globalSize = get_num_groups(0) * get_local_size(0); 336 for (uint32_t i = globalId; i < size; i += globalSize) 337 { 338 dest[i] = src[i]; 339 } 340} 341 342 343GRL_ANNOTATE_IGC_DO_NOT_SPILL 344__attribute__((reqd_work_group_size(32, 1, 1))) 345void kernel mem_set(global char *dest, 346 dword byte, 347 dword size) 348{ 349 uint32_t globalId = get_local_id(0) + get_group_id(0) * get_local_size(0); 350 if (globalId < size) 351 { 352 dest[globalId] = (char)byte; 353 } 354} 355 356GRL_ANNOTATE_IGC_DO_NOT_SPILL 357__attribute__((reqd_work_group_size(32, 1, 1))) 358void kernel mem_set_size_ptr(global char *dest, 359 dword byte, 360 global qword* sizePtr) 361{ 362 uint32_t globalId = get_local_id(0) + get_group_id(0) * get_local_size(0); 363 if (globalId < *sizePtr) 364 { 365 dest[globalId] = (char)byte; 366 } 367} 368