xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/api_interface.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7 
8 #pragma once
9 #include "GRLStructs.h"
10 #include "shared.h"
11 #include "libs/lsc_intrinsics.h"
12 
13 typedef struct Geo GRL_RAYTRACING_GEOMETRY_DESC;
14 
15 typedef struct GRL_RAYTRACING_AABB
16 {
17     float MinX;
18     float MinY;
19     float MinZ;
20     float MaxX;
21     float MaxY;
22     float MaxZ;
23 } GRL_RAYTRACING_AABB;
24 
GLR_set_raytracing_aabb(GRL_RAYTRACING_AABB * dest,struct AABB * source)25 GRL_INLINE void GLR_set_raytracing_aabb(GRL_RAYTRACING_AABB* dest, struct AABB* source)
26 {
27     dest->MinX = source->lower.x;
28     dest->MinY = source->lower.y;
29     dest->MinZ = source->lower.z;
30     dest->MaxX = source->upper.x;
31     dest->MaxY = source->upper.y;
32     dest->MaxZ = source->upper.z;
33 }
34 
GRL_load_triangle(global GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,const uint triID)35 GRL_INLINE uint3 GRL_load_triangle(global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, const uint triID)
36 {
37     global char* indices = (global char*)geomDesc->Desc.Triangles.pIndexBuffer;
38     uint index_format = geomDesc->Desc.Triangles.IndexFormat;
39 
40     if (index_format == INDEX_FORMAT_R32_UINT)
41     {
42         const uint* data = (const uint*)(indices + triID * 3 * 4);
43         return (uint3)(data[0], data[1], data[2]);
44     }
45     else if (index_format == INDEX_FORMAT_NONE)
46     {
47         return (uint3)(triID * 3, triID * 3 + 1, triID * 3 + 2);
48     }
49     else
50     {
51         const ushort* data = (const ushort*)(indices + triID * 3 * 2);
52         return (uint3)(data[0], data[1], data[2]);
53     }
54 }
55 
GRL_load_indices_from_buffer(global char * indices,const uint index_format,const uint triID)56 GRL_INLINE uint3 GRL_load_indices_from_buffer(global char* indices, const uint index_format, const uint triID)
57 {
58     if (index_format == INDEX_FORMAT_R32_UINT)
59     {
60         return load_uint3_L1C_L3C((global uint3*)(indices + triID * 3 * 4), 0);
61     }
62     else if (index_format == INDEX_FORMAT_NONE)
63     {
64         return (uint3)(triID * 3, triID * 3 + 1, triID * 3 + 2);
65     }
66     else
67     {
68         const ushort* data = (const ushort*)(indices + triID * 3 * 2);
69         return (uint3)(data[0], data[1], data[2]);
70     }
71 }
72 
73 // Load all 3 indices from one triangle, and a single index from another
GRL_load_quad_indices(global GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,uint triID,uint triID_1,ushort fourth_vert)74 GRL_INLINE uint4 GRL_load_quad_indices(global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, uint triID, uint triID_1, ushort fourth_vert)
75 {
76     global char* indices = (global char*)geomDesc->Desc.Triangles.pIndexBuffer;
77     uint index_format = geomDesc->Desc.Triangles.IndexFormat;
78 
79     if (index_format == INDEX_FORMAT_R32_UINT)
80     {
81         const uint* data0 = (const uint*)(indices + triID * 3 * 4);
82         const uint* data1 = (const uint*)(indices + triID_1 * 3 * 4);
83         return (uint4)(data0[0], data0[1], data0[2], data1[fourth_vert]);
84     }
85     else if (index_format == INDEX_FORMAT_NONE)
86     {
87         return (uint4)(triID * 3, triID * 3 + 1, triID * 3 + 2, triID_1 * 3 + fourth_vert);
88     }
89     else
90     {
91         const ushort* data0 = (const ushort*)(indices + triID * 3 * 2);
92         const ushort* data1 = (const ushort*)(indices + triID_1 * 3 * 2);
93         return (uint4)(data0[0], data0[1], data0[2], data1[fourth_vert]);
94     }
95 }
96 
GRL_set_Type(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,GeometryType type)97 GRL_INLINE void GRL_set_Type(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, GeometryType type)
98 {
99     geomDesc->Type = type;
100 }
101 
GRL_get_Type(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)102 GRL_INLINE GeometryType GRL_get_Type(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
103 {
104     return geomDesc->Type;
105 }
106 
GRL_set_Flags(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,uint8_t flags)107 GRL_INLINE void GRL_set_Flags(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, uint8_t flags)
108 {
109     geomDesc->Flags = flags;
110 }
111 
GRL_get_Flags(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)112 GRL_INLINE uint8_t GRL_get_Flags(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
113 {
114     return geomDesc->Flags;
115 }
116 
GRL_set_triangles_Transform(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,gpuva_t transform)117 GRL_INLINE void GRL_set_triangles_Transform(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, gpuva_t transform)
118 {
119     geomDesc->Desc.Triangles.pTransformBuffer = transform;
120 }
121 
GRL_get_triangles_Transform(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)122 GRL_INLINE gpuva_t GRL_get_triangles_Transform(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
123 {
124     return geomDesc->Desc.Triangles.pTransformBuffer;
125 }
126 
GRL_set_triangles_IndexFormat(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,IndexFormat format)127 GRL_INLINE void GRL_set_triangles_IndexFormat(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, IndexFormat format)
128 {
129     geomDesc->Desc.Triangles.IndexFormat = format;
130 }
131 
GRL_get_triangles_IndexFormat(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)132 GRL_INLINE IndexFormat GRL_get_triangles_IndexFormat(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
133 {
134     return geomDesc->Desc.Triangles.IndexFormat;
135 }
136 
GRL_set_triangles_VertexFormat(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,VertexFormat format)137 GRL_INLINE void GRL_set_triangles_VertexFormat(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, VertexFormat format)
138 {
139     geomDesc->Desc.Triangles.VertexFormat = format;
140 }
141 
GRL_get_triangles_VertexFormat(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)142 GRL_INLINE VertexFormat GRL_get_triangles_VertexFormat(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
143 {
144     return geomDesc->Desc.Triangles.VertexFormat;
145 }
146 
GRL_set_triangles_IndexCount(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,dword count)147 GRL_INLINE void GRL_set_triangles_IndexCount(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, dword count)
148 {
149     geomDesc->Desc.Triangles.IndexCount = count;
150 }
151 
GRL_get_triangles_IndexCount(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)152 GRL_INLINE dword GRL_get_triangles_IndexCount(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
153 {
154     return geomDesc->Desc.Triangles.IndexCount;
155 }
156 
GRL_set_triangles_VertexCount(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,dword count)157 GRL_INLINE void GRL_set_triangles_VertexCount(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, dword count)
158 {
159     geomDesc->Desc.Triangles.VertexCount = count;
160 }
161 
GRL_get_triangles_VertexCount(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)162 GRL_INLINE dword GRL_get_triangles_VertexCount(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
163 {
164     return geomDesc->Desc.Triangles.VertexCount;
165 }
166 
GRL_set_triangles_IndexBuffer(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,gpuva_t buffer)167 GRL_INLINE void GRL_set_triangles_IndexBuffer(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, gpuva_t buffer)
168 {
169     geomDesc->Desc.Triangles.pIndexBuffer = buffer;
170 }
171 
GRL_get_triangles_IndexBuffer(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)172 GRL_INLINE gpuva_t GRL_get_triangles_IndexBuffer(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
173 {
174     return geomDesc->Desc.Triangles.pIndexBuffer;
175 }
176 
GRL_set_triangles_VertexBuffer_StartAddress(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,gpuva_t address)177 GRL_INLINE void GRL_set_triangles_VertexBuffer_StartAddress(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, gpuva_t address)
178 {
179     geomDesc->Desc.Triangles.pVertexBuffer = address;
180 }
181 
GRL_get_triangles_VertexBuffer_StartAddress(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)182 GRL_INLINE gpuva_t GRL_get_triangles_VertexBuffer_StartAddress(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
183 {
184     return geomDesc->Desc.Triangles.pVertexBuffer;
185 }
186 
GRL_set_triangles_VertexBuffer_StrideInBytes(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,unsigned long stride)187 GRL_INLINE void GRL_set_triangles_VertexBuffer_StrideInBytes(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, unsigned long stride)
188 {
189     geomDesc->Desc.Triangles.VertexBufferByteStride = stride;
190 }
191 
GRL_get_triangles_VertexBuffer_StrideInBytes(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)192 GRL_INLINE unsigned long GRL_get_triangles_VertexBuffer_StrideInBytes(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
193 {
194     return geomDesc->Desc.Triangles.VertexBufferByteStride;
195 }
196 
GRL_get_triangles_IndexFormatSizeInBytes(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)197 GRL_INLINE unsigned long GRL_get_triangles_IndexFormatSizeInBytes(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
198 {
199     return (unsigned long)(geomDesc->Desc.Triangles.IndexFormat);
200 }
201 
GRL_set_procedurals_AABBCount(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,dword count)202 GRL_INLINE void GRL_set_procedurals_AABBCount(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, dword count)
203 {
204     geomDesc->Desc.Procedural.AABBCount = count;
205 }
206 
GRL_get_procedurals_AABBCount(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)207 GRL_INLINE dword GRL_get_procedurals_AABBCount(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
208 {
209     return geomDesc->Desc.Procedural.AABBCount;
210 }
211 
GRL_set_procedurals_AABBs_StartAddress(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,gpuva_t address)212 GRL_INLINE void GRL_set_procedurals_AABBs_StartAddress(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, gpuva_t address)
213 {
214     geomDesc->Desc.Procedural.pAABBs_GPUVA = address;
215 }
216 
GRL_get_procedurals_AABBs_StartAddress(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)217 GRL_INLINE gpuva_t GRL_get_procedurals_AABBs_StartAddress(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
218 {
219     return geomDesc->Desc.Procedural.pAABBs_GPUVA;
220 }
221 
GRL_set_procedurals_AABBs_StrideInBytes(GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,qword stride)222 GRL_INLINE void GRL_set_procedurals_AABBs_StrideInBytes(GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, qword stride)
223 {
224     geomDesc->Desc.Procedural.AABBByteStride = stride;
225 }
226 
GRL_get_procedurals_AABBs_StrideInBytes(const GRL_RAYTRACING_GEOMETRY_DESC * geomDesc)227 GRL_INLINE qword GRL_get_procedurals_AABBs_StrideInBytes(const GRL_RAYTRACING_GEOMETRY_DESC* geomDesc)
228 {
229     return geomDesc->Desc.Procedural.AABBByteStride;
230 }
231 
GRL_is_procedural(GRL_RAYTRACING_GEOMETRY_DESC * desc)232 GRL_INLINE uint GRL_is_procedural(GRL_RAYTRACING_GEOMETRY_DESC* desc)
233 {
234     return desc->Type == (unsigned char)GEOMETRY_TYPE_PROCEDURAL;
235 }
236 
GRL_is_triangle(GRL_RAYTRACING_GEOMETRY_DESC * desc)237 GRL_INLINE uint GRL_is_triangle(GRL_RAYTRACING_GEOMETRY_DESC* desc)
238 {
239     return desc->Type != (unsigned char)GEOMETRY_TYPE_PROCEDURAL;
240 }
241 
GRL_get_ShaderIndex_Mask(GRL_RAYTRACING_GEOMETRY_DESC * desc)242 GRL_INLINE unsigned int GRL_get_ShaderIndex_Mask(GRL_RAYTRACING_GEOMETRY_DESC* desc)
243 {
244     return 0x00FFFFFF;
245 }
246 
GRL_atomic_add_triangles_VertexCount(GRL_RAYTRACING_GEOMETRY_DESC * desc,dword value)247 GRL_INLINE dword GRL_atomic_add_triangles_VertexCount(GRL_RAYTRACING_GEOMETRY_DESC* desc, dword value)
248 {
249     return atomic_add((global uint*) & desc->Desc.Triangles.VertexCount, value);
250 }
251 
GRL_get_primitive_count(GRL_RAYTRACING_GEOMETRY_DESC * desc)252 GRL_INLINE unsigned int GRL_get_primitive_count(GRL_RAYTRACING_GEOMETRY_DESC* desc)
253 {
254     if (GRL_is_triangle(desc))
255     {
256         if (desc->Desc.Triangles.IndexFormat == INDEX_FORMAT_NONE)
257         {
258             return desc->Desc.Triangles.VertexCount / 3;
259         }
260         else
261         {
262             return desc->Desc.Triangles.IndexCount / 3;
263         }
264     }
265     else
266     {
267         return desc->Desc.Procedural.AABBCount;
268     }
269 }
270 
271 #pragma OPENCL EXTENSION cl_khr_fp16 : enable // to leaf half values
272 
snorm_to_float(short v)273 GRL_INLINE float snorm_to_float(short v)
274 {
275     return min(1.0f, max(-1.0f, ((float)v) * (1.0f / 32767.0f))); // FIXME: do we have intrinsic for this?
276 }
277 
snorm8_to_float(signed char v)278 GRL_INLINE float snorm8_to_float(signed char v)
279 {
280     return min(1.0f, max(-1.0f, ((float)v) * (1.0f / 127.0f))); // FIXME: do we have intrinsic for this?
281 }
282 
unorm_to_float(unsigned short v)283 GRL_INLINE float unorm_to_float(unsigned short v)
284 {
285     return min(1.0f, max(0.0f, ((float)v) * (1.0f / 65535.0f))); // FIXME: do we have intrinsic for this?
286 }
287 
288 //only lower 10 bits of v are used
unorm10_to_float(unsigned v)289 GRL_INLINE float unorm10_to_float(unsigned v)
290 {
291     const unsigned short mask = (unsigned short)((1u << 10u) - 1u);
292     const unsigned short v10 = (unsigned short)v & mask;
293     return min(1.0f, max(0.0f, ((float)v10) * (1.0f / 1023.0f))); // FIXME: do we have intrinsic for this?
294 }
295 
unorm8_to_float(unsigned char v)296 GRL_INLINE float unorm8_to_float(unsigned char v)
297 {
298     return min(1.0f, max(0.0f, ((float)v) * (1.0f / 255.0f))); // FIXME: do we have intrinsic for this?
299 }
300 
GRL_load_vertex(global GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,const uint vtxID)301 GRL_INLINE float4 GRL_load_vertex(global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, const uint vtxID)
302 {
303     float4 v = (float4)(0, 0, 0, 0);
304     global char* vertices = (global char*)geomDesc->Desc.Triangles.pVertexBuffer;
305     uint vertex_stride = geomDesc->Desc.Triangles.VertexBufferByteStride;
306     uint vertex_format = geomDesc->Desc.Triangles.VertexFormat;
307 
308     if (vertex_format == VERTEX_FORMAT_R32G32B32_FLOAT)
309     {
310         const float* data = (const float*)(vertices + vtxID * vertex_stride);
311         v = (float4)(data[0], data[1], data[2], 0.0f);
312     }
313     else if (vertex_format == VERTEX_FORMAT_R32G32_FLOAT)
314     {
315         const float* data = (const float*)(vertices + vtxID * vertex_stride);
316         v = (float4)(data[0], data[1], 0.0f, 0.0f);
317     }
318     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_FLOAT)
319     {
320         const half* data = (const half*)(vertices + vtxID * vertex_stride);
321         v = (float4)(data[0], data[1], data[2], 0.0f);
322     }
323     else if (vertex_format == VERTEX_FORMAT_R16G16_FLOAT)
324     {
325         const half* data = (const half*)(vertices + vtxID * vertex_stride);
326         v = (float4)(data[0], data[1], 0.0f, 0.0f);
327     }
328     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_SNORM)
329     {
330         const short* data = (const short*)(vertices + vtxID * vertex_stride);
331         v = (float4)(snorm_to_float(data[0]),
332             snorm_to_float(data[1]),
333             snorm_to_float(data[2]),
334             0.0f);
335     }
336     else if (vertex_format == VERTEX_FORMAT_R16G16_SNORM)
337     {
338         const short* data = (const short*)(vertices + vtxID * vertex_stride);
339         v = (float4)(snorm_to_float(data[0]),
340             snorm_to_float(data[1]),
341             0.0f,
342             0.0f);
343     }
344     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_UNORM)
345     {
346         const unsigned short* data = (const unsigned short*)(vertices + vtxID * vertex_stride);
347         v = (float4)(unorm_to_float(data[0]),
348             unorm_to_float(data[1]),
349             unorm_to_float(data[2]),
350             0.0f);
351     }
352     else if (vertex_format == VERTEX_FORMAT_R16G16_UNORM)
353     {
354         const unsigned short* data = (const unsigned short*)(vertices + vtxID * vertex_stride);
355         v = (float4)(unorm_to_float(data[0]),
356             unorm_to_float(data[1]),
357             0.0f,
358             0.0f);
359     }
360     else if (vertex_format == VERTEX_FORMAT_R10G10B10A2_UNORM)
361     {
362         const unsigned data = *(const unsigned*)(vertices + vtxID * vertex_stride);
363         v = (float4)(unorm10_to_float(data),
364             unorm10_to_float((data >> 10)),
365             unorm10_to_float((data >> 20)),
366             0.0f);
367     }
368     else if (vertex_format == VERTEX_FORMAT_R8G8B8A8_UNORM)
369     {
370         const unsigned char* data = (const unsigned char*)(vertices + vtxID * vertex_stride);
371         v = (float4)(unorm8_to_float(data[0]),
372             unorm8_to_float(data[1]),
373             unorm8_to_float(data[2]),
374             0.0f);
375     }
376     else if (vertex_format == VERTEX_FORMAT_R8G8_UNORM)
377     {
378         const unsigned char* data = (const unsigned char*)(vertices + vtxID * vertex_stride);
379         v = (float4)(unorm8_to_float(data[0]),
380             unorm8_to_float(data[1]),
381             0.0f,
382             0.0f);
383     }
384     else if (vertex_format == VERTEX_FORMAT_R8G8B8A8_SNORM)
385     {
386         const unsigned char* data = (const unsigned char*)(vertices + vtxID * vertex_stride);
387         v = (float4)(snorm8_to_float(data[0]),
388             snorm8_to_float(data[1]),
389             snorm8_to_float(data[2]),
390             0.0f);
391     }
392     else if (vertex_format == VERTEX_FORMAT_R8G8_SNORM)
393     {
394         const unsigned char* data = (const unsigned char*)(vertices + vtxID * vertex_stride);
395         v = (float4)(snorm8_to_float(data[0]),
396             snorm8_to_float(data[1]),
397             0.0f,
398             0.0f);
399     }
400 
401     /* perform vertex transformation */
402     if (geomDesc->Desc.Triangles.pTransformBuffer)
403     {
404         global float* xfm = (global float*)geomDesc->Desc.Triangles.pTransformBuffer;
405         const float x = xfm[0] * v.x + xfm[1] * v.y + xfm[2] * v.z + xfm[3];
406         const float y = xfm[4] * v.x + xfm[5] * v.y + xfm[6] * v.z + xfm[7];
407         const float z = xfm[8] * v.x + xfm[9] * v.y + xfm[10] * v.z + xfm[11];
408         v = (float4)(x, y, z, 0.0f);
409     }
410 
411     return v;
412 }
413 
GRL_load_triangle_vertices(global char * vertices,const uint vertex_format,const uint vertex_stride,global float * transform_buffer,const uint vtx0ID,const uint vtx1ID,const uint vtx2ID,float4 * out)414 GRL_INLINE void GRL_load_triangle_vertices(global char* vertices, const uint vertex_format, const uint vertex_stride, global float* transform_buffer, const uint vtx0ID, const uint vtx1ID, const uint vtx2ID, float4* out)
415 {
416     if (vertex_format == VERTEX_FORMAT_R32G32B32_FLOAT)
417     {
418         const float3 data0 = as_float3(load_uint3_L1C_L3C((global uint3*)(vertices + vtx0ID * vertex_stride), 0));
419         const float3 data1 = as_float3(load_uint3_L1C_L3C((global uint3*)(vertices + vtx1ID * vertex_stride), 0));
420         const float3 data2 = as_float3(load_uint3_L1C_L3C((global uint3*)(vertices + vtx2ID * vertex_stride), 0));
421         out[0] = (float4)(data0[0], data0[1], data0[2], 0.0f);
422         out[1] = (float4)(data1[0], data1[1], data1[2], 0.0f);
423         out[2] = (float4)(data2[0], data2[1], data2[2], 0.0f);
424     }
425     else if (vertex_format == VERTEX_FORMAT_R32G32_FLOAT)
426     {
427         const float* data0 = (const float*)(vertices + vtx0ID * vertex_stride);
428         const float* data1 = (const float*)(vertices + vtx1ID * vertex_stride);
429         const float* data2 = (const float*)(vertices + vtx2ID * vertex_stride);
430         out[0] = (float4)(data0[0], data0[1], 0.0f, 0.0f);
431         out[1] = (float4)(data1[0], data1[1], 0.0f, 0.0f);
432         out[2] = (float4)(data2[0], data2[1], 0.0f, 0.0f);
433     }
434     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_FLOAT)
435     {
436         const half* data0 = (const half*)(vertices + vtx0ID * vertex_stride);
437         const half* data1 = (const half*)(vertices + vtx1ID * vertex_stride);
438         const half* data2 = (const half*)(vertices + vtx2ID * vertex_stride);
439         out[0] = (float4)(data0[0], data0[1], data0[2], 0.0f);
440         out[1] = (float4)(data1[0], data1[1], data1[2], 0.0f);
441         out[2] = (float4)(data2[0], data2[1], data2[2], 0.0f);
442     }
443     else if (vertex_format == VERTEX_FORMAT_R16G16_FLOAT)
444     {
445         const half* data0 = (const half*)(vertices + vtx0ID * vertex_stride);
446         const half* data1 = (const half*)(vertices + vtx1ID * vertex_stride);
447         const half* data2 = (const half*)(vertices + vtx2ID * vertex_stride);
448         out[0] = (float4)(data0[0], data0[1], 0.0f, 0.0f);
449         out[1] = (float4)(data1[0], data1[1], 0.0f, 0.0f);
450         out[2] = (float4)(data2[0], data2[1], 0.0f, 0.0f);
451     }
452     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_SNORM)
453     {
454         const short* data0 = (const short*)(vertices + vtx0ID * vertex_stride);
455         const short* data1 = (const short*)(vertices + vtx1ID * vertex_stride);
456         const short* data2 = (const short*)(vertices + vtx2ID * vertex_stride);
457         out[0] = (float4)(snorm_to_float(data0[0]), snorm_to_float(data0[1]), snorm_to_float(data0[2]), 0.0f);
458         out[1] = (float4)(snorm_to_float(data1[0]), snorm_to_float(data1[1]), snorm_to_float(data1[2]), 0.0f);
459         out[2] = (float4)(snorm_to_float(data2[0]), snorm_to_float(data2[1]), snorm_to_float(data2[2]), 0.0f);
460     }
461     else if (vertex_format == VERTEX_FORMAT_R16G16_SNORM)
462     {
463         const short* data0 = (const short*)(vertices + vtx0ID * vertex_stride);
464         const short* data1 = (const short*)(vertices + vtx1ID * vertex_stride);
465         const short* data2 = (const short*)(vertices + vtx2ID * vertex_stride);
466         out[0] = (float4)(snorm_to_float(data0[0]), snorm_to_float(data0[1]), 0.0f, 0.0f);
467         out[1] = (float4)(snorm_to_float(data1[0]), snorm_to_float(data1[1]), 0.0f, 0.0f);
468         out[2] = (float4)(snorm_to_float(data2[0]), snorm_to_float(data2[1]), 0.0f, 0.0f);
469     }
470     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_UNORM)
471     {
472         const unsigned short* data0 = (const unsigned short*)(vertices + vtx0ID * vertex_stride);
473         const unsigned short* data1 = (const unsigned short*)(vertices + vtx1ID * vertex_stride);
474         const unsigned short* data2 = (const unsigned short*)(vertices + vtx2ID * vertex_stride);
475         out[0] = (float4)(unorm_to_float(data0[0]), unorm_to_float(data0[1]), unorm_to_float(data0[2]), 0.0f);
476         out[1] = (float4)(unorm_to_float(data1[0]), unorm_to_float(data1[1]), unorm_to_float(data1[2]), 0.0f);
477         out[2] = (float4)(unorm_to_float(data2[0]), unorm_to_float(data2[1]), unorm_to_float(data2[2]), 0.0f);
478     }
479     else if (vertex_format == VERTEX_FORMAT_R16G16_UNORM)
480     {
481         const unsigned short* data0 = (const unsigned short*)(vertices + vtx0ID * vertex_stride);
482         const unsigned short* data1 = (const unsigned short*)(vertices + vtx1ID * vertex_stride);
483         const unsigned short* data2 = (const unsigned short*)(vertices + vtx2ID * vertex_stride);
484         out[0] = (float4)(unorm_to_float(data0[0]), unorm_to_float(data0[1]), 0.0f, 0.0f);
485         out[1] = (float4)(unorm_to_float(data1[0]), unorm_to_float(data1[1]), 0.0f, 0.0f);
486         out[2] = (float4)(unorm_to_float(data2[0]), unorm_to_float(data2[1]), 0.0f, 0.0f);
487     }
488     else if (vertex_format == VERTEX_FORMAT_R10G10B10A2_UNORM)
489     {
490         const unsigned data0 = *(const unsigned*)(vertices + vtx0ID * vertex_stride);
491         const unsigned data1 = *(const unsigned*)(vertices + vtx1ID * vertex_stride);
492         const unsigned data2 = *(const unsigned*)(vertices + vtx2ID * vertex_stride);
493         out[0] = (float4)(unorm10_to_float(data0), unorm10_to_float(data0 >> 10), unorm10_to_float(data0 >> 20), 0.0f);
494         out[1] = (float4)(unorm10_to_float(data1), unorm10_to_float(data1 >> 10), unorm10_to_float(data1 >> 20), 0.0f);
495         out[2] = (float4)(unorm10_to_float(data2), unorm10_to_float(data2 >> 10), unorm10_to_float(data2 >> 20), 0.0f);
496     }
497     else if (vertex_format == VERTEX_FORMAT_R8G8B8A8_UNORM)
498     {
499         const unsigned char* data0 = (const unsigned char*)(vertices + vtx0ID * vertex_stride);
500         const unsigned char* data1 = (const unsigned char*)(vertices + vtx1ID * vertex_stride);
501         const unsigned char* data2 = (const unsigned char*)(vertices + vtx2ID * vertex_stride);
502         out[0] = (float4)(unorm8_to_float(data0[0]), unorm8_to_float(data0[1]), unorm8_to_float(data0[2]), 0.0f);
503         out[1] = (float4)(unorm8_to_float(data1[0]), unorm8_to_float(data1[1]), unorm8_to_float(data1[2]), 0.0f);
504         out[2] = (float4)(unorm8_to_float(data2[0]), unorm8_to_float(data2[1]), unorm8_to_float(data2[2]), 0.0f);
505     }
506     else if (vertex_format == VERTEX_FORMAT_R8G8_UNORM)
507     {
508         const unsigned char* data0 = (const unsigned char*)(vertices + vtx0ID * vertex_stride);
509         const unsigned char* data1 = (const unsigned char*)(vertices + vtx1ID * vertex_stride);
510         const unsigned char* data2 = (const unsigned char*)(vertices + vtx2ID * vertex_stride);
511         out[0] = (float4)(unorm8_to_float(data0[0]), unorm8_to_float(data0[1]), 0.0f, 0.0f);
512         out[1] = (float4)(unorm8_to_float(data1[0]), unorm8_to_float(data1[1]), 0.0f, 0.0f);
513         out[2] = (float4)(unorm8_to_float(data2[0]), unorm8_to_float(data2[1]), 0.0f, 0.0f);
514     }
515     else if (vertex_format == VERTEX_FORMAT_R8G8B8A8_SNORM)
516     {
517         const unsigned char* data0 = (const unsigned char*)(vertices + vtx0ID * vertex_stride);
518         const unsigned char* data1 = (const unsigned char*)(vertices + vtx1ID * vertex_stride);
519         const unsigned char* data2 = (const unsigned char*)(vertices + vtx2ID * vertex_stride);
520         out[0] = (float4)(snorm8_to_float(data0[0]), snorm8_to_float(data0[1]), snorm8_to_float(data0[2]), 0.0f);
521         out[1] = (float4)(snorm8_to_float(data1[0]), snorm8_to_float(data1[1]), snorm8_to_float(data1[2]), 0.0f);
522         out[2] = (float4)(snorm8_to_float(data2[0]), snorm8_to_float(data2[1]), snorm8_to_float(data2[2]), 0.0f);
523     }
524     else if (vertex_format == VERTEX_FORMAT_R8G8_SNORM)
525     {
526         const unsigned char* data0 = (const unsigned char*)(vertices + vtx0ID * vertex_stride);
527         const unsigned char* data1 = (const unsigned char*)(vertices + vtx1ID * vertex_stride);
528         const unsigned char* data2 = (const unsigned char*)(vertices + vtx2ID * vertex_stride);
529         out[0] = (float4)(snorm8_to_float(data0[0]), snorm8_to_float(data0[1]), 0.0f, 0.0f);
530         out[1] = (float4)(snorm8_to_float(data1[0]), snorm8_to_float(data1[1]), 0.0f, 0.0f);
531         out[2] = (float4)(snorm8_to_float(data2[0]), snorm8_to_float(data2[1]), 0.0f, 0.0f);
532     }
533 
534     /* perform vertex transformation */
535     if (transform_buffer)
536     {
537         global float* xfm = (global float*)transform_buffer;
538         for (uint i = 0; i < 3; ++i)
539         {
540             const float x = xfm[0] * out[i].x + xfm[1] * out[i].y + xfm[2] * out[i].z + xfm[3];
541             const float y = xfm[4] * out[i].x + xfm[5] * out[i].y + xfm[6] * out[i].z + xfm[7];
542             const float z = xfm[8] * out[i].x + xfm[9] * out[i].y + xfm[10] * out[i].z + xfm[11];
543             out[i] = (float4)(x, y, z, 0.0f);
544         }
545     }
546 }
547 
GRL_load_quad_vertices_no_stride(global GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,float3 * out0,float3 * out1,float3 * out2,float3 * out3,const uint4 vtxID,const uint vertex_format,global char * vertices)548 GRL_INLINE void GRL_load_quad_vertices_no_stride(global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
549     float3* out0, float3* out1, float3* out2, float3* out3,
550     const uint4 vtxID, const uint vertex_format, global char* vertices)
551 {
552     float3 v0, v1, v2, v3;
553 
554     if (vertex_format == VERTEX_FORMAT_R32G32B32_FLOAT)
555     {
556         const float* data0 = (const float*)(vertices + vtxID.x);
557         const float* data1 = (const float*)(vertices + vtxID.y);
558         const float* data2 = (const float*)(vertices + vtxID.z);
559         const float* data3 = (const float*)(vertices + vtxID.w);
560         v0 = (float3)(data0[0], data0[1], data0[2]);
561         v1 = (float3)(data1[0], data1[1], data1[2]);
562         v2 = (float3)(data2[0], data2[1], data2[2]);
563         v3 = (float3)(data3[0], data3[1], data3[2]);
564     }
565     else if (vertex_format == VERTEX_FORMAT_R32G32_FLOAT)
566     {
567         const float* data0 = (const float*)(vertices + vtxID.x);
568         const float* data1 = (const float*)(vertices + vtxID.y);
569         const float* data2 = (const float*)(vertices + vtxID.z);
570         const float* data3 = (const float*)(vertices + vtxID.w);
571         v0 = (float3)(data0[0], data0[1], 0.0f);
572         v1 = (float3)(data1[0], data1[1], 0.0f);
573         v2 = (float3)(data2[0], data2[1], 0.0f);
574         v3 = (float3)(data3[0], data3[1], 0.0f);
575     }
576     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_FLOAT)
577     {
578         const half* data0 = (const half*)(vertices + vtxID.x);
579         const half* data1 = (const half*)(vertices + vtxID.y);
580         const half* data2 = (const half*)(vertices + vtxID.z);
581         const half* data3 = (const half*)(vertices + vtxID.w);
582         v0 = (float3)(data0[0], data0[1], data0[2]);
583         v1 = (float3)(data1[0], data1[1], data1[2]);
584         v2 = (float3)(data2[0], data2[1], data2[2]);
585         v3 = (float3)(data3[0], data3[1], data3[2]);
586     }
587     else if (vertex_format == VERTEX_FORMAT_R16G16_FLOAT)
588     {
589         const half* data0 = (const half*)(vertices + vtxID.x);
590         const half* data1 = (const half*)(vertices + vtxID.y);
591         const half* data2 = (const half*)(vertices + vtxID.z);
592         const half* data3 = (const half*)(vertices + vtxID.w);
593         v0 = (float3)(data0[0], data0[1], 0.0f);
594         v1 = (float3)(data1[0], data1[1], 0.0f);
595         v2 = (float3)(data2[0], data2[1], 0.0f);
596         v3 = (float3)(data3[0], data3[1], 0.0f);
597     }
598     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_SNORM)
599     {
600         const short* data0 = (const short*)(vertices + vtxID.x);
601         const short* data1 = (const short*)(vertices + vtxID.y);
602         const short* data2 = (const short*)(vertices + vtxID.z);
603         const short* data3 = (const short*)(vertices + vtxID.w);
604         v0 = (float3)(snorm_to_float(data0[0]), snorm_to_float(data0[1]), snorm_to_float(data0[2]));
605         v1 = (float3)(snorm_to_float(data1[0]), snorm_to_float(data1[1]), snorm_to_float(data1[2]));
606         v2 = (float3)(snorm_to_float(data2[0]), snorm_to_float(data2[1]), snorm_to_float(data2[2]));
607         v3 = (float3)(snorm_to_float(data3[0]), snorm_to_float(data3[1]), snorm_to_float(data3[2]));
608     }
609     else if (vertex_format == VERTEX_FORMAT_R16G16_SNORM)
610     {
611         const short* data0 = (const short*)(vertices + vtxID.x);
612         const short* data1 = (const short*)(vertices + vtxID.y);
613         const short* data2 = (const short*)(vertices + vtxID.z);
614         const short* data3 = (const short*)(vertices + vtxID.w);
615         v0 = (float3)(snorm_to_float(data0[0]), snorm_to_float(data0[1]), 0.0f);
616         v1 = (float3)(snorm_to_float(data1[0]), snorm_to_float(data1[1]), 0.0f);
617         v2 = (float3)(snorm_to_float(data2[0]), snorm_to_float(data2[1]), 0.0f);
618         v3 = (float3)(snorm_to_float(data3[0]), snorm_to_float(data3[1]), 0.0f);
619     }
620     else if (vertex_format == VERTEX_FORMAT_R16G16B16A16_UNORM)
621     {
622         const unsigned short* data0 = (const unsigned short*)(vertices + vtxID.x);
623         const unsigned short* data1 = (const unsigned short*)(vertices + vtxID.y);
624         const unsigned short* data2 = (const unsigned short*)(vertices + vtxID.z);
625         const unsigned short* data3 = (const unsigned short*)(vertices + vtxID.w);
626         v0 = (float3)(unorm_to_float(data0[0]), unorm_to_float(data0[1]), unorm_to_float(data0[2]));
627         v1 = (float3)(unorm_to_float(data1[0]), unorm_to_float(data1[1]), unorm_to_float(data1[2]));
628         v2 = (float3)(unorm_to_float(data2[0]), unorm_to_float(data2[1]), unorm_to_float(data2[2]));
629         v3 = (float3)(unorm_to_float(data3[0]), unorm_to_float(data3[1]), unorm_to_float(data3[2]));
630     }
631     else if (vertex_format == VERTEX_FORMAT_R16G16_UNORM)
632     {
633         const unsigned short* data0 = (const unsigned short*)(vertices + vtxID.x);
634         const unsigned short* data1 = (const unsigned short*)(vertices + vtxID.y);
635         const unsigned short* data2 = (const unsigned short*)(vertices + vtxID.z);
636         const unsigned short* data3 = (const unsigned short*)(vertices + vtxID.w);
637         v0 = (float3)(unorm_to_float(data0[0]), unorm_to_float(data0[1]), 0.0f);
638         v1 = (float3)(unorm_to_float(data1[0]), unorm_to_float(data1[1]), 0.0f);
639         v2 = (float3)(unorm_to_float(data2[0]), unorm_to_float(data2[1]), 0.0f);
640         v3 = (float3)(unorm_to_float(data3[0]), unorm_to_float(data3[1]), 0.0f);
641     }
642     else if (vertex_format == VERTEX_FORMAT_R10G10B10A2_UNORM)
643     {
644         const unsigned data0 = *(const unsigned*)(vertices + vtxID.x);
645         const unsigned data1 = *(const unsigned*)(vertices + vtxID.y);
646         const unsigned data2 = *(const unsigned*)(vertices + vtxID.z);
647         const unsigned data3 = *(const unsigned*)(vertices + vtxID.w);
648         v0 = (float3)(unorm10_to_float(data0), unorm10_to_float((data0 >> 10)), unorm10_to_float((data0 >> 20)));
649         v1 = (float3)(unorm10_to_float(data1), unorm10_to_float((data1 >> 10)), unorm10_to_float((data1 >> 20)));
650         v2 = (float3)(unorm10_to_float(data2), unorm10_to_float((data2 >> 10)), unorm10_to_float((data2 >> 20)));
651         v3 = (float3)(unorm10_to_float(data3), unorm10_to_float((data3 >> 10)), unorm10_to_float((data3 >> 20)));
652     }
653     else if (vertex_format == VERTEX_FORMAT_R8G8B8A8_UNORM)
654     {
655         const unsigned char* data0 = (const unsigned char*)(vertices + vtxID.x);
656         const unsigned char* data1 = (const unsigned char*)(vertices + vtxID.y);
657         const unsigned char* data2 = (const unsigned char*)(vertices + vtxID.z);
658         const unsigned char* data3 = (const unsigned char*)(vertices + vtxID.w);
659         v0 = (float3)(unorm8_to_float(data0[0]), unorm8_to_float(data0[1]), unorm8_to_float(data0[2]));
660         v1 = (float3)(unorm8_to_float(data1[0]), unorm8_to_float(data1[1]), unorm8_to_float(data1[2]));
661         v2 = (float3)(unorm8_to_float(data2[0]), unorm8_to_float(data2[1]), unorm8_to_float(data2[2]));
662         v3 = (float3)(unorm8_to_float(data3[0]), unorm8_to_float(data3[1]), unorm8_to_float(data3[2]));
663     }
664     else if (vertex_format == VERTEX_FORMAT_R8G8_UNORM)
665     {
666         const unsigned char* data0 = (const unsigned char*)(vertices + vtxID.x);
667         const unsigned char* data1 = (const unsigned char*)(vertices + vtxID.y);
668         const unsigned char* data2 = (const unsigned char*)(vertices + vtxID.z);
669         const unsigned char* data3 = (const unsigned char*)(vertices + vtxID.w);
670         v0 = (float3)(unorm8_to_float(data0[0]), unorm8_to_float(data0[1]), 0.0f);
671         v1 = (float3)(unorm8_to_float(data1[0]), unorm8_to_float(data1[1]), 0.0f);
672         v2 = (float3)(unorm8_to_float(data2[0]), unorm8_to_float(data2[1]), 0.0f);
673         v3 = (float3)(unorm8_to_float(data3[0]), unorm8_to_float(data3[1]), 0.0f);
674     }
675     else if (vertex_format == VERTEX_FORMAT_R8G8B8A8_SNORM)
676     {
677         const signed char* data0 = (const signed char*)(vertices + vtxID.x);
678         const signed char* data1 = (const signed char*)(vertices + vtxID.y);
679         const signed char* data2 = (const signed char*)(vertices + vtxID.z);
680         const signed char* data3 = (const signed char*)(vertices + vtxID.w);
681         v0 = (float3)(snorm8_to_float(data0[0]), snorm8_to_float(data0[1]), snorm8_to_float(data0[2]));
682         v1 = (float3)(snorm8_to_float(data1[0]), snorm8_to_float(data1[1]), snorm8_to_float(data1[2]));
683         v2 = (float3)(snorm8_to_float(data2[0]), snorm8_to_float(data2[1]), snorm8_to_float(data2[2]));
684         v3 = (float3)(snorm8_to_float(data3[0]), snorm8_to_float(data3[1]), snorm8_to_float(data3[2]));
685     }
686     else if (vertex_format == VERTEX_FORMAT_R8G8_SNORM)
687     {
688         const signed char* data0 = (const signed char*)(vertices + vtxID.x);
689         const signed char* data1 = (const signed char*)(vertices + vtxID.y);
690         const signed char* data2 = (const signed char*)(vertices + vtxID.z);
691         const signed char* data3 = (const signed char*)(vertices + vtxID.w);
692         v0 = (float3)(snorm8_to_float(data0[0]), snorm8_to_float(data0[1]), 0.0f);
693         v1 = (float3)(snorm8_to_float(data1[0]), snorm8_to_float(data1[1]), 0.0f);
694         v2 = (float3)(snorm8_to_float(data2[0]), snorm8_to_float(data2[1]), 0.0f);
695         v3 = (float3)(snorm8_to_float(data3[0]), snorm8_to_float(data3[1]), 0.0f);
696     }
697     else
698     {
699         v0 = (float3)(0.0f, 0.0f, 0.0f);
700         v1 = (float3)(0.0f, 0.0f, 0.0f);
701         v2 = (float3)(0.0f, 0.0f, 0.0f);
702         v3 = (float3)(0.0f, 0.0f, 0.0f);
703     }
704 
705 
706     /* perform vertex transformation */
707     if (geomDesc->Desc.Triangles.pTransformBuffer)
708     {
709         global float* xfm = (global float*)geomDesc->Desc.Triangles.pTransformBuffer;
710 
711         v0.xyz = (float3)(
712             xfm[0] * v0.x + xfm[1] * v0.y + xfm[2] * v0.z + xfm[3],
713             xfm[4] * v0.x + xfm[5] * v0.y + xfm[6] * v0.z + xfm[7],
714             xfm[8] * v0.x + xfm[9] * v0.y + xfm[10] * v0.z + xfm[11]
715             );
716 
717         v1.xyz = (float3)(
718             xfm[0] * v1.x + xfm[1] * v1.y + xfm[2] * v1.z + xfm[3],
719             xfm[4] * v1.x + xfm[5] * v1.y + xfm[6] * v1.z + xfm[7],
720             xfm[8] * v1.x + xfm[9] * v1.y + xfm[10] * v1.z + xfm[11]
721             );
722 
723         v2.xyz = (float3)(
724             xfm[0] * v2.x + xfm[1] * v2.y + xfm[2] * v2.z + xfm[3],
725             xfm[4] * v2.x + xfm[5] * v2.y + xfm[6] * v2.z + xfm[7],
726             xfm[8] * v2.x + xfm[9] * v2.y + xfm[10] * v2.z + xfm[11]
727             );
728 
729         v3.xyz = (float3)(
730             xfm[0] * v3.x + xfm[1] * v3.y + xfm[2] * v3.z + xfm[3],
731             xfm[4] * v3.x + xfm[5] * v3.y + xfm[6] * v3.z + xfm[7],
732             xfm[8] * v3.x + xfm[9] * v3.y + xfm[10] * v3.z + xfm[11]
733             );
734     }
735 
736     *out0 = v0;
737     *out1 = v1;
738     *out2 = v2;
739     *out3 = v3;
740 }
741 
742 
GRL_load_quad_vertices(global GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,float3 * out0,float3 * out1,float3 * out2,float3 * out3,uint4 vtxID)743 GRL_INLINE void GRL_load_quad_vertices(global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc,
744     float3* out0, float3* out1, float3* out2, float3* out3,
745     uint4 vtxID)
746 {
747     global char* vertices = (global char*)geomDesc->Desc.Triangles.pVertexBuffer;
748     uint vertex_format = geomDesc->Desc.Triangles.VertexFormat;
749     uint vertex_stride = geomDesc->Desc.Triangles.VertexBufferByteStride;
750 
751     vtxID *= vertex_stride;
752 
753     GRL_load_quad_vertices_no_stride(geomDesc, out0, out1, out2, out3,
754         vtxID, vertex_format, vertices);
755 }
756 
757 
GRL_load_aabb(global GRL_RAYTRACING_GEOMETRY_DESC * geomDesc,const uint primID)758 GRL_INLINE GRL_RAYTRACING_AABB GRL_load_aabb(global GRL_RAYTRACING_GEOMETRY_DESC* geomDesc, const uint primID)
759 {
760     global char* aabb0 = (global char*)geomDesc->Desc.Procedural.pAABBs_GPUVA;
761     global char* aabb = aabb0 + (primID * geomDesc->Desc.Procedural.AABBByteStride);
762     return *(global GRL_RAYTRACING_AABB*)aabb;
763 }
764 
765 // same as for d3d12
766 typedef struct GRL_RAYTRACING_INSTANCE_DESC
767 {
768     float Transform[12];
769     //     unsigned int InstanceID : 24;
770     //     unsigned int InstanceMask : 8;
771     uint32_t DW0;
772     //     unsigned int InstanceContributionToHitGroupIndex : 24;
773     //     unsigned int Flags : 8;
774     uint32_t DW1;
775     global char* AccelerationStructure;
776 } GRL_RAYTRACING_INSTANCE_DESC;
777 
GRL_get_transform(const GRL_RAYTRACING_INSTANCE_DESC * d,const uint32_t row,const uint32_t column)778 GRL_INLINE float GRL_get_transform(const GRL_RAYTRACING_INSTANCE_DESC* d, const uint32_t row, const uint32_t column)
779 {
780     return d->Transform[row * 4 + column];
781 }
782 
GRL_get_instanceID(const GRL_RAYTRACING_INSTANCE_DESC * d)783 GRL_INLINE uint32_t GRL_get_instanceID(const GRL_RAYTRACING_INSTANCE_DESC* d)
784 {
785     return d->DW0 & ((1 << 24) - 1);
786 }
787 
GRL_get_InstanceMask(const GRL_RAYTRACING_INSTANCE_DESC * d)788 GRL_INLINE uint32_t GRL_get_InstanceMask(const GRL_RAYTRACING_INSTANCE_DESC* d)
789 {
790     return d->DW0 >> 24;
791 }
792 
GRL_get_InstanceContributionToHitGroupIndex(const GRL_RAYTRACING_INSTANCE_DESC * d)793 GRL_INLINE uint32_t GRL_get_InstanceContributionToHitGroupIndex(const GRL_RAYTRACING_INSTANCE_DESC* d)
794 {
795     return d->DW1 & ((1 << 24) - 1);
796 }
797 
GRL_get_InstanceFlags(const GRL_RAYTRACING_INSTANCE_DESC * d)798 GRL_INLINE uint32_t GRL_get_InstanceFlags(const GRL_RAYTRACING_INSTANCE_DESC* d)
799 {
800     return d->DW1 >> 24;
801 }
802 
GRL_get_AccelerationStructure(const GRL_RAYTRACING_INSTANCE_DESC * d)803 GRL_INLINE gpuva_t GRL_get_AccelerationStructure(const GRL_RAYTRACING_INSTANCE_DESC* d)
804 {
805     return (gpuva_t)d->AccelerationStructure;
806 }
807 
GRL_set_transform(GRL_RAYTRACING_INSTANCE_DESC * d,const uint32_t row,const uint32_t column,float value)808 GRL_INLINE void GRL_set_transform(GRL_RAYTRACING_INSTANCE_DESC* d, const uint32_t row, const uint32_t column, float value)
809 {
810     d->Transform[row * 4 + column] = value;
811 }
812 
GRL_set_instanceID(GRL_RAYTRACING_INSTANCE_DESC * d,const uint32_t id)813 GRL_INLINE void GRL_set_instanceID(GRL_RAYTRACING_INSTANCE_DESC* d, const uint32_t id)
814 {
815     d->DW0 &= 255 << 24;
816     d->DW0 |= id & ((1 << 24) - 1);
817 }
818 
GRL_set_InstanceMask(GRL_RAYTRACING_INSTANCE_DESC * d,const uint32_t mask)819 GRL_INLINE void GRL_set_InstanceMask(GRL_RAYTRACING_INSTANCE_DESC* d, const uint32_t mask)
820 {
821     d->DW0 &= ((1 << 24) - 1);
822     d->DW0 |= mask << 24;
823 }
824 
GRL_set_InstanceContributionToHitGroupIndex(GRL_RAYTRACING_INSTANCE_DESC * d,const uint32_t contribution)825 GRL_INLINE void GRL_set_InstanceContributionToHitGroupIndex(GRL_RAYTRACING_INSTANCE_DESC* d, const uint32_t contribution)
826 {
827     d->DW1 &= 255 << 24;
828     d->DW1 |= contribution & ((1 << 24) - 1);
829 }
830 
GRL_set_InstanceFlags(GRL_RAYTRACING_INSTANCE_DESC * d,const uint32_t flags)831 GRL_INLINE void GRL_set_InstanceFlags(GRL_RAYTRACING_INSTANCE_DESC* d, const uint32_t flags)
832 {
833     d->DW1 &= ((1 << 24) - 1);
834     d->DW1 |= flags << 24;
835 }
836 
GRL_set_AccelerationStructure(GRL_RAYTRACING_INSTANCE_DESC * d,gpuva_t address)837 GRL_INLINE void GRL_set_AccelerationStructure(GRL_RAYTRACING_INSTANCE_DESC* d, gpuva_t address)
838 {
839     d->AccelerationStructure = (global char*)address;
840 }
841