xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/misc.cl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8#include "api_interface.h"
9#include "common.h"
10#include "instance.h"
11#include "misc_shared.h"
12#include "mem_utils.h"
13
14#define DBG(x)
15#define ENABLE_CHECKS 0
16
17#define CACHELINE_SIZE 64
18#define CACHELINE_PER_BLOCK 4
19#define BLOCK_SIZE 256 // = CACHELINE_SIZE * CACHELINE_PER_BLOCK;
20
21GRL_INLINE
22uint32_t getGeomDescPrimitiveCountAsUint32t(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t index)
23{
24    return  (uint32_t)GRL_get_primitive_count(&geomDesc[index]);
25}
26
27GRL_INLINE
28uint32_t getGeomDescTypeAndFlagsAsUint32t(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t index)
29{
30    return (uint32_t)GRL_get_Type(&geomDesc[index]) |
31           (((uint32_t)GRL_get_Flags(&geomDesc[index])) << 16);
32}
33
34GRL_INLINE
35uint64_t getGeomDescAsUint64t(global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t index)
36{
37    return (uint64_t)getGeomDescPrimitiveCountAsUint32t(geomDesc, index) |
38           (((uint64_t)getGeomDescTypeAndFlagsAsUint32t(geomDesc, index)) << 32);
39}
40
41// assummed:
42// dst is always 64 bytes alligned
43GRL_INLINE
44void copyGeoMetaData(global char* dst, global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc, uint64_t size, uint numGroups)
45{
46    uint taskId = get_group_id(0);
47    uint localId = get_sub_group_local_id();
48
49    uint cachelinedSize = (size) & (~(CACHELINE_SIZE-1));
50
51    uint reminderOffset = cachelinedSize;
52    uint reminderQWSize = (size - reminderOffset) >> 3;
53
54    uint tailCacheLines = cachelinedSize >> 6; // divide by CACHELINE_SIZE
55    uint reversedTaskId = (uint)(-(((int)taskId) - ((int)numGroups-1)));
56    if (reversedTaskId == tailCacheLines && localId < reminderQWSize)
57    {
58        uint reminderOffsetQW = reminderOffset >> 3;
59        global uint64_t* dstQW = (global uint64_t*)(dst);
60        dstQW[localId + reminderOffsetQW] = getGeomDescAsUint64t(geomDesc, localId + reminderOffsetQW);
61    }
62
63    uint numCacheLines = cachelinedSize >> 6;
64    while (taskId < numCacheLines)
65    {
66        uint byteOffset = taskId * CACHELINE_SIZE;
67        uint geoIdFromOffset = (byteOffset >> 3) + (localId >> 1);
68
69        uint32_t data = 0;
70        if (localId & 1)
71        {
72            data = getGeomDescTypeAndFlagsAsUint32t(geomDesc, geoIdFromOffset);
73        }
74        else
75        {
76            data = getGeomDescPrimitiveCountAsUint32t(geomDesc, geoIdFromOffset);
77        }
78        CacheLineSubgroupWrite(dst + byteOffset, data);
79
80        taskId += numGroups;
81    }
82}
83
84GRL_INLINE
85uint groupCountForInstancesCopySize(uint size)
86{
87    return (size >> 8) + 3;
88}
89
90GRL_INLINE
91uint groupCountForGeoMetaDataCopySize(uint size)
92{
93    return (size >> 6) + 1;
94}
95
96GRL_ANNOTATE_IGC_DO_NOT_SPILL
97__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
98__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
99void kernel copy_instances(global char* dest, global char* instancesArray, uint64_t size)
100{
101  //  global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart);
102    copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size));
103}
104
105GRL_ANNOTATE_IGC_DO_NOT_SPILL
106__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
107__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
108void kernel copy_instances_indirect(global char* dest, global char* instancesArray, global const struct IndirectBuildRangeInfo* const indirect_data)
109{
110    uint64_t size = indirect_data->primitiveCount * sizeof(InstanceDesc);
111    instancesArray += indirect_data->primitiveOffset;
112    uint tid = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH;
113    if (tid == 0)
114    {
115        struct BVHBase* bvh     = (struct BVHBase*)dest;
116        bvh->Meta.instanceCount = indirect_data->primitiveCount;
117    }
118    copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size));
119}
120
121GRL_ANNOTATE_IGC_DO_NOT_SPILL
122__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
123__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
124void kernel copy_instance_ptrs(global char* dest, global uint64_t* arrayOfPtrs, uint64_t size)
125{
126    //global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart);
127    copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size));
128}
129
130GRL_ANNOTATE_IGC_DO_NOT_SPILL
131__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
132__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
133void kernel copy_instance_ptrs_indirect(global char* dest, global uint64_t* arrayOfPtrs, global struct IndirectBuildRangeInfo const * const indirect_data)
134{
135    uint64_t size = indirect_data->primitiveCount * sizeof(InstanceDesc);
136    arrayOfPtrs += indirect_data->primitiveOffset;
137    uint tid = get_sub_group_local_id() + get_group_id(0) * MAX_HW_SIMD_WIDTH;
138    if (tid == 0)
139    {
140        struct BVHBase* bvh     = (struct BVHBase*)dest;
141        bvh->Meta.instanceCount = indirect_data->primitiveCount;
142    }
143    copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size));
144}
145
146GRL_ANNOTATE_IGC_DO_NOT_SPILL
147__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
148__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
149void kernel copy_instances_base_ptr(global BVHBase* bvh, global char* instancesArray, uint64_t size)
150{
151    global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart);
152    copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size));
153}
154
155GRL_ANNOTATE_IGC_DO_NOT_SPILL
156__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
157__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
158void kernel copy_instances_base_ptr_indirect(global BVHBase* bvh, global char* instancesArray, global struct IndirectBuildRangeInfo const * const indirect_data)
159{
160    global char* dest = (global char*)((unsigned long)bvh + bvh->Meta.instanceDescsStart);
161    uint64_t     size = indirect_data->primitiveCount * sizeof(InstanceDesc);
162    instancesArray += indirect_data->primitiveOffset;
163    copyInstances(dest, instancesArray, NULL, size, groupCountForInstancesCopySize(size));
164}
165
166GRL_ANNOTATE_IGC_DO_NOT_SPILL
167__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
168__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
169void kernel copy_instance_ptrs_base_ptr(global BVHBase* bvh, global uint64_t* arrayOfPtrs, uint64_t size)
170{
171    global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.instanceDescsStart);
172    copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size));
173}
174
175GRL_ANNOTATE_IGC_DO_NOT_SPILL
176__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
177__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
178void kernel copy_instance_ptrs_base_ptr_indirect(global BVHBase* bvh, global uint64_t* arrayOfPtrs, global struct IndirectBuildRangeInfo const * const indirect_data)
179{
180    global char* dest = (global char*)((unsigned long)bvh + bvh->Meta.instanceDescsStart);
181    uint64_t     size = indirect_data->primitiveCount * sizeof(InstanceDesc);
182    arrayOfPtrs += indirect_data->primitiveOffset;
183    copyInstances(dest, NULL, arrayOfPtrs, size, groupCountForInstancesCopySize(size));
184}
185
186GRL_ANNOTATE_IGC_DO_NOT_SPILL
187__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1)))
188__attribute__((intel_reqd_sub_group_size(MAX_HW_SIMD_WIDTH)))
189void kernel copy_geo_meta_data(global char* dest, global char* src, uint64_t size)
190{
191    //global char *dest = (global char *)((unsigned long)bvh + bvh->Meta.geoDescsStart);
192    global GRL_RAYTRACING_GEOMETRY_DESC *geomDesc = (global GRL_RAYTRACING_GEOMETRY_DESC *)((unsigned long)src);
193    copyGeoMetaData(dest, geomDesc, size, groupCountForGeoMetaDataCopySize(size));
194}
195
196GRL_ANNOTATE_IGC_DO_NOT_SPILL
197__attribute__( ( reqd_work_group_size( MAX_HW_SIMD_WIDTH, 1, 1 ) ) )
198__attribute__( ( intel_reqd_sub_group_size( MAX_HW_SIMD_WIDTH ) ) )
199void kernel copy_geo_descs_indirect_build(global char* dest, global char* src, global struct IndirectBuildRangeInfo const * const indirect_data, uint numGeometries)
200{
201    uint32_t gid = get_local_id(0) + get_group_id(0) * get_local_size(0);
202    if (gid < numGeometries) {
203        global GRL_RAYTRACING_GEOMETRY_DESC* dstDesc = (global GRL_RAYTRACING_GEOMETRY_DESC*)(dest);
204        global GRL_RAYTRACING_GEOMETRY_DESC* srcDesc = (global GRL_RAYTRACING_GEOMETRY_DESC*)(src);
205
206        GRL_RAYTRACING_GEOMETRY_DESC geo = srcDesc[gid];
207
208        uint primitiveCount  = indirect_data[gid].primitiveCount;
209        uint primitiveOffset = indirect_data[gid].primitiveOffset;
210        uint firstVertex     = indirect_data[gid].firstVertex;
211        uint transformOffset = indirect_data[gid].transformOffset;
212
213        if (srcDesc[gid].Type == GEOMETRY_TYPE_TRIANGLES)
214        {
215            if (geo.Desc.Triangles.IndexFormat == INDEX_FORMAT_NONE)
216            {
217                geo.Desc.Triangles.VertexCount = primitiveCount * 3;
218                geo.Desc.Triangles.pVertexBuffer += primitiveOffset
219                                                    + firstVertex * geo.Desc.Triangles.VertexBufferByteStride;
220            }
221            else
222            {
223                geo.Desc.Triangles.IndexCount = primitiveCount * 3;
224                geo.Desc.Triangles.pIndexBuffer += primitiveOffset;
225                geo.Desc.Triangles.pVertexBuffer += firstVertex * geo.Desc.Triangles.VertexBufferByteStride;
226            }
227            if (geo.Desc.Triangles.pTransformBuffer) {
228                geo.Desc.Triangles.pTransformBuffer += transformOffset;
229            }
230        } else {
231            // GEOMETRY_TYPE_PROCEDURAL
232            geo.Desc.Procedural.AABBCount = primitiveCount;
233            geo.Desc.Procedural.pAABBs_GPUVA += primitiveOffset;
234        }
235
236        dstDesc[gid] = geo;
237    }
238}
239
240GRL_ANNOTATE_IGC_DO_NOT_SPILL
241__attribute__((reqd_work_group_size(1, 1, 1))) void kernel batched_init_globals(global struct BatchedInitGlobalsData *data)
242{
243    uint groupID = get_group_id(0);
244
245    struct BatchedInitGlobalsData entry = data[groupID];
246
247    global struct Globals* globals = (global struct Globals*)entry.p_build_globals;
248    global char *bvh_mem = (global char*)entry.p_bvh_buffer;
249    uint numPrimitives = entry.numPrimitives;
250    uint numGeometries = entry.numGeometries;
251    uint numInstances = entry.numInstances;
252    uint instance_descs_start = entry.instance_descs_start;
253    uint geo_meta_data_start = entry.geo_meta_data_start;
254    uint node_data_start = entry.node_data_start;
255    uint quad_data_start = entry.leaf_data_start;
256    uint instance_data_start = entry.leaf_data_start;
257    uint procedural_data_start = entry.procedural_data_start;
258    uint back_pointer_start = entry.back_pointer_start;
259    uint build_record_start = entry.leaf_data_start;
260    uint totalBytes = entry.sizeTotal;
261    uint leafPrimType = entry.leafType;
262    uint leafSize = entry.leafSize;
263
264    uint root_node_offset = node_data_start;
265    struct BVHBase *base = (struct BVHBase *)bvh_mem;
266
267    base->Meta.instanceCount      = numInstances;
268    base->Meta.geoCount           = numGeometries;
269    base->Meta.instanceDescsStart = instance_descs_start;
270    base->Meta.geoDescsStart      = geo_meta_data_start;
271    base->Meta.allocationSize     = totalBytes;
272    // This doesnt work correctly
273    //ERROR_INFO initErr = { 0, 0, 0, 0xAAABBAAA };
274    //base->Meta.errors = initErr;
275    base->Meta.errors.type = 0;
276    base->Meta.errors.offset_in_BVH = 0; //in 64B units
277    base->Meta.errors.when = 0;
278    base->Meta.errors.reserved = 0xAAABBAAA;
279
280    base->nodeDataCur = node_data_start / 64;
281    base->quadLeafStart = quad_data_start / 64;
282    base->quadLeafCur = quad_data_start / 64;
283    base->instanceLeafStart = instance_data_start / 64;
284    base->instanceLeafEnd = instance_data_start / 64;
285    base->proceduralDataStart = procedural_data_start / 64;
286    base->proceduralDataCur = procedural_data_start / 64;
287    base->backPointerDataStart = back_pointer_start / 64;
288    base->refitTreeletsDataStart = totalBytes / 64;
289    base->refitStartPointDataStart = totalBytes / 64;
290    base->BVHDataEnd = totalBytes / 64;
291    base->refitTreeletCnt = 0;
292    base->refitTreeletCnt2 = 0;
293    base->rootNodeOffset = root_node_offset;
294
295    base->fatLeafCount = 0;
296    base->fatLeafTableStart = entry.fatleaf_table_start / 64;
297    base->innerCount = 0;
298    base->innerTableStart = entry.innernode_table_start / 64;
299    base->quadLeftoversCountNewAtomicUpdate = 0;
300    base->quadTableSizeNewAtomicUpdate = 0;
301    base->quadIndicesDataStart = entry.quad_indices_data_start / 64;
302
303    if (back_pointer_start != totalBytes)
304    {
305        BackPointers* back_pointers = BVHBase_GetBackPointers(base);
306        uint root_node_idx = root_node_offset - node_data_start;
307        global uint *root_node_backpointer = (global uint *)InnerNode_GetBackPointer(back_pointers,root_node_idx);
308        *root_node_backpointer = ((uint)-1) << 6;
309    }
310
311    AABB3f_init(&base->Meta.bounds);
312    AABB_init(&globals->centroidBounds);
313
314    globals->build_record_start = build_record_start;
315
316    globals->numBuildRecords = 0;
317    globals->numBuildRecords_extended = 0;
318    globals->numPrimitives = numPrimitives;
319    globals->numSplittedPrimitives = 0;
320    globals->sync = 0;
321    globals->probThreshold = 0.0f;
322    globals->leafPrimType = leafPrimType;
323    globals->leafSize = leafSize;
324}
325
326
327
328// This is temporary WA for mock in DXR
329GRL_ANNOTATE_IGC_DO_NOT_SPILL
330__attribute__((reqd_work_group_size(MAX_HW_SIMD_WIDTH, 1, 1))) void kernel copy_mock(global char *dest,
331                                                                                     global char *src,
332                                                                                     uint32_t size)
333{
334    uint32_t globalId = get_local_id(0) + get_group_id(0) * get_local_size(0);
335    uint32_t globalSize = get_num_groups(0) * get_local_size(0);
336    for (uint32_t i = globalId; i < size; i += globalSize)
337    {
338        dest[i] = src[i];
339    }
340}
341
342
343GRL_ANNOTATE_IGC_DO_NOT_SPILL
344__attribute__((reqd_work_group_size(32, 1, 1)))
345void kernel mem_set(global char *dest,
346    dword byte,
347    dword size)
348{
349    uint32_t globalId = get_local_id(0) + get_group_id(0) * get_local_size(0);
350    if (globalId < size)
351    {
352        dest[globalId] = (char)byte;
353    }
354}
355
356GRL_ANNOTATE_IGC_DO_NOT_SPILL
357__attribute__((reqd_work_group_size(32, 1, 1)))
358void kernel mem_set_size_ptr(global char *dest,
359    dword byte,
360    global qword* sizePtr)
361{
362    uint32_t globalId = get_local_id(0) + get_group_id(0) * get_local_size(0);
363    if (globalId < *sizePtr)
364    {
365        dest[globalId] = (char)byte;
366    }
367}
368