xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/ray_tracing/vktRayTracingMiscTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Misc tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingMiscTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 
27 #include "vkDefs.hpp"
28 
29 #include "vktTestCase.hpp"
30 #include "vkCmdUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkBarrierUtil.hpp"
34 #include "vkBufferWithMemory.hpp"
35 #include "vkImageWithMemory.hpp"
36 #include "vkTypeUtil.hpp"
37 
38 #include "vkRayTracingUtil.hpp"
39 
40 #include "deRandom.hpp"
41 #include <algorithm>
42 #include <memory>
43 #include <sstream>
44 
45 namespace vkt
46 {
47 namespace RayTracing
48 {
49 namespace
50 {
51 using namespace vk;
52 using namespace std;
53 
54 enum class BaseType
55 {
56     F32,
57     F64,
58     I8,
59     I16,
60     I32,
61     I64,
62     U8,
63     U16,
64     U32,
65     U64,
66 
67     UNKNOWN
68 };
69 
70 enum class GeometryType
71 {
72     FIRST = 0,
73 
74     AABB = FIRST,
75     TRIANGLES,
76 
77     COUNT,
78 
79     AABB_AND_TRIANGLES, //< Only compatible with ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES AS layout.
80 };
81 
82 enum class MatrixMajorOrder
83 {
84     COLUMN_MAJOR,
85     ROW_MAJOR,
86 
87     UNKNOWN
88 };
89 
90 enum class ShaderGroups
91 {
92     FIRST_GROUP  = 0,
93     RAYGEN_GROUP = FIRST_GROUP,
94     MISS_GROUP,
95     HIT_GROUP,
96 
97     FIRST_CALLABLE_GROUP,
98 };
99 
100 enum class TestType
101 {
102     AABBS_AND_TRIS_IN_ONE_TL,
103     AS_STRESS_TEST,
104     CALLABLE_SHADER_STRESS_DYNAMIC_TEST,
105     CALLABLE_SHADER_STRESS_TEST,
106     CULL_MASK,
107     MAX_RAY_HIT_ATTRIBUTE_SIZE,
108     MAX_RT_INVOCATIONS_SUPPORTED,
109     CULL_MASK_EXTRA_BITS,
110     NO_DUPLICATE_ANY_HIT,
111     REPORT_INTERSECTION_RESULT,
112     RAY_PAYLOAD_IN,
113     RECURSIVE_TRACES_0,
114     RECURSIVE_TRACES_1,
115     RECURSIVE_TRACES_2,
116     RECURSIVE_TRACES_3,
117     RECURSIVE_TRACES_4,
118     RECURSIVE_TRACES_5,
119     RECURSIVE_TRACES_6,
120     RECURSIVE_TRACES_7,
121     RECURSIVE_TRACES_8,
122     RECURSIVE_TRACES_9,
123     RECURSIVE_TRACES_10,
124     RECURSIVE_TRACES_11,
125     RECURSIVE_TRACES_12,
126     RECURSIVE_TRACES_13,
127     RECURSIVE_TRACES_14,
128     RECURSIVE_TRACES_15,
129     RECURSIVE_TRACES_16,
130     RECURSIVE_TRACES_17,
131     RECURSIVE_TRACES_18,
132     RECURSIVE_TRACES_19,
133     RECURSIVE_TRACES_20,
134     RECURSIVE_TRACES_21,
135     RECURSIVE_TRACES_22,
136     RECURSIVE_TRACES_23,
137     RECURSIVE_TRACES_24,
138     RECURSIVE_TRACES_25,
139     RECURSIVE_TRACES_26,
140     RECURSIVE_TRACES_27,
141     RECURSIVE_TRACES_28,
142     RECURSIVE_TRACES_29,
143     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1,
144     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2,
145     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3,
146     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4,
147     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5,
148     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6,
149     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1,
150     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2,
151     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3,
152     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4,
153     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5,
154     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6,
155     SHADER_RECORD_BLOCK_SCALAR_1,
156     SHADER_RECORD_BLOCK_SCALAR_2,
157     SHADER_RECORD_BLOCK_SCALAR_3,
158     SHADER_RECORD_BLOCK_SCALAR_4,
159     SHADER_RECORD_BLOCK_SCALAR_5,
160     SHADER_RECORD_BLOCK_SCALAR_6,
161     SHADER_RECORD_BLOCK_STD430_1,
162     SHADER_RECORD_BLOCK_STD430_2,
163     SHADER_RECORD_BLOCK_STD430_3,
164     SHADER_RECORD_BLOCK_STD430_4,
165     SHADER_RECORD_BLOCK_STD430_5,
166     SHADER_RECORD_BLOCK_STD430_6,
167     IGNORE_ANY_HIT_STATICALLY,
168     IGNORE_ANY_HIT_DYNAMICALLY,
169     TERMINATE_ANY_HIT_STATICALLY,
170     TERMINATE_ANY_HIT_DYNAMICALLY,
171     TERMINATE_INTERSECTION_STATICALLY,
172     TERMINATE_INTERSECTION_DYNAMICALLY,
173     USE_MEMORY_ACCESS,
174 
175     COUNT
176 };
177 
178 enum class VariableType
179 {
180     FIRST,
181 
182     FLOAT = FIRST,
183     VEC2,
184     VEC3,
185     VEC4,
186 
187     MAT2,
188     MAT2X2,
189     MAT2X3,
190     MAT2X4,
191     MAT3,
192     MAT3X2,
193     MAT3X3,
194     MAT3X4,
195     MAT4,
196     MAT4X2,
197     MAT4X3,
198     MAT4X4,
199 
200     INT,
201     IVEC2,
202     IVEC3,
203     IVEC4,
204 
205     INT8,
206     I8VEC2,
207     I8VEC3,
208     I8VEC4,
209 
210     INT16,
211     I16VEC2,
212     I16VEC3,
213     I16VEC4,
214 
215     INT64,
216     I64VEC2,
217     I64VEC3,
218     I64VEC4,
219 
220     UINT,
221     UVEC2,
222     UVEC3,
223     UVEC4,
224 
225     UINT16,
226     U16VEC2,
227     U16VEC3,
228     U16VEC4,
229 
230     UINT64,
231     U64VEC2,
232     U64VEC3,
233     U64VEC4,
234 
235     UINT8,
236     U8VEC2,
237     U8VEC3,
238     U8VEC4,
239 
240     DOUBLE,
241     DVEC2,
242     DVEC3,
243     DVEC4,
244 
245     DMAT2,
246     DMAT2X2,
247     DMAT2X3,
248     DMAT2X4,
249     DMAT3,
250     DMAT3X2,
251     DMAT3X3,
252     DMAT3X4,
253     DMAT4,
254     DMAT4X2,
255     DMAT4X3,
256     DMAT4X4,
257 
258     UNKNOWN,
259     COUNT = UNKNOWN,
260 };
261 
262 enum class AccelerationStructureLayout
263 {
264     FIRST = 0,
265 
266     ONE_TL_ONE_BL_ONE_GEOMETRY = FIRST,
267     ONE_TL_ONE_BL_MANY_GEOMETRIES,
268     ONE_TL_MANY_BLS_ONE_GEOMETRY,
269     ONE_TL_MANY_BLS_MANY_GEOMETRIES,
270 
271     COUNT,
272 
273     ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES
274 };
275 
276 static const VkFlags ALL_RAY_TRACING_STAGES = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
277                                               VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
278                                               VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
279 
280 struct CaseDef
281 {
282     TestType type;
283     GeometryType geometryType;
284     AccelerationStructureLayout asLayout;
285 
CaseDefvkt::RayTracing::__anon87081bdf0111::CaseDef286     CaseDef() : type(TestType::COUNT), geometryType(GeometryType::COUNT), asLayout(AccelerationStructureLayout::COUNT)
287     {
288         /* Stub */
289     }
290 
CaseDefvkt::RayTracing::__anon87081bdf0111::CaseDef291     CaseDef(const TestType &inType)
292         : type(inType)
293         , geometryType(GeometryType::COUNT)
294         , asLayout(AccelerationStructureLayout::COUNT)
295     {
296         /* Stub */
297     }
298 
CaseDefvkt::RayTracing::__anon87081bdf0111::CaseDef299     CaseDef(const TestType &inType, const GeometryType &inGeometryType, const AccelerationStructureLayout &inAsLayout)
300         : type(inType)
301         , geometryType(inGeometryType)
302         , asLayout(inAsLayout)
303     {
304         /* Stub */
305     }
306 };
307 
308 /* Helper global functions */
getSuffixForASLayout(const AccelerationStructureLayout & layout)309 static const char *getSuffixForASLayout(const AccelerationStructureLayout &layout)
310 {
311     const char *result = "?!";
312 
313     switch (layout)
314     {
315     case AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY:
316         result = "1TL1BL1G";
317         break;
318     case AccelerationStructureLayout::ONE_TL_ONE_BL_MANY_GEOMETRIES:
319         result = "1TL1BLnG";
320         break;
321     case AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY:
322         result = "1TLnBL1G";
323         break;
324     case AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES:
325         result = "1TLnBLnG";
326         break;
327 
328     default:
329     {
330         deAssertFail("This should never happen", __FILE__, __LINE__);
331     }
332     }
333 
334     return result;
335 }
336 
getSuffixForGeometryType(const GeometryType & type)337 static const char *getSuffixForGeometryType(const GeometryType &type)
338 {
339     const char *result = "?!";
340 
341     switch (type)
342     {
343     case GeometryType::AABB:
344         result = "AABB";
345         break;
346     case GeometryType::TRIANGLES:
347         result = "tri";
348         break;
349 
350     default:
351     {
352         deAssertFail("This should never happen", __FILE__, __LINE__);
353     }
354     }
355 
356     return result;
357 }
358 
359 /* Instances and primitives in acceleration structures can have additional information assigned.
360  *
361  * By overriding functions of interest in this class, tests can further customize ASes generated by AS providers.
362  */
363 class ASPropertyProvider
364 {
365 public:
~ASPropertyProvider()366     virtual ~ASPropertyProvider()
367     {
368         /* Stub */
369     }
370 
getCullMask(const uint32_t & nBL,const uint32_t & nInstance) const371     virtual uint8_t getCullMask(const uint32_t &nBL, const uint32_t &nInstance) const
372     {
373         DE_UNREF(nBL);
374         DE_UNREF(nInstance);
375 
376         return 0xFF;
377     }
378 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const379     virtual uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const
380     {
381         DE_UNREF(nBL);
382         DE_UNREF(nInstance);
383         return 0;
384     }
385 };
386 
387 class IGridASFeedback
388 {
389 public:
~IGridASFeedback()390     virtual ~IGridASFeedback()
391     {
392         /* Stub */
393     }
394 
395     virtual void onCullMaskAssignedToCell(const tcu::UVec3 &cellLocation, const uint8_t &cullMaskAssigned) = 0;
396     virtual void onInstanceCustomIndexAssignedToCell(const tcu::UVec3 &cellLocation,
397                                                      const uint32_t &customIndexAssigned)                  = 0;
398 };
399 
400 /* Acceleration structure data providers.
401  *
402  * These are expected to be reused across different test cases.
403  **/
404 class ASProviderBase
405 {
406 public:
~ASProviderBase()407     virtual ~ASProviderBase()
408     {
409         /* Stub */
410     }
411 
412     virtual std::unique_ptr<TopLevelAccelerationStructure> createTLAS(
413         Context &context, const AccelerationStructureLayout &asLayout, VkCommandBuffer cmdBuffer,
414         const VkGeometryFlagsKHR &bottomLevelGeometryFlags,
415         const ASPropertyProvider *optAsPropertyProviderPtr = nullptr,
416         IGridASFeedback *optASFeedbackPtr                  = nullptr) const = 0;
417     virtual uint32_t getNPrimitives() const                = 0;
418 };
419 
420 /* A 3D grid built of primitives. Size and distribution of the geometry can be configured both at creation time and at a later time. */
421 class GridASProvider : public ASProviderBase
422 {
423 public:
GridASProvider(const tcu::Vec3 & gridStartXYZ,const tcu::Vec3 & gridCellSizeXYZ,const tcu::UVec3 & gridSizeXYZ,const tcu::Vec3 & gridInterCellDeltaXYZ,const GeometryType & geometryType)424     GridASProvider(const tcu::Vec3 &gridStartXYZ, const tcu::Vec3 &gridCellSizeXYZ, const tcu::UVec3 &gridSizeXYZ,
425                    const tcu::Vec3 &gridInterCellDeltaXYZ, const GeometryType &geometryType)
426         : m_geometryType(geometryType)
427         , m_gridCellSizeXYZ(gridCellSizeXYZ)
428         , m_gridInterCellDeltaXYZ(gridInterCellDeltaXYZ)
429         , m_gridSizeXYZ(gridSizeXYZ)
430         , m_gridStartXYZ(gridStartXYZ)
431     {
432         fillVertexVec();
433     }
434 
createTLAS(Context & context,const AccelerationStructureLayout & asLayout,VkCommandBuffer cmdBuffer,const VkGeometryFlagsKHR & bottomLevelGeometryFlags,const ASPropertyProvider * optASPropertyProviderPtr,IGridASFeedback * optASFeedbackPtr) const435     std::unique_ptr<TopLevelAccelerationStructure> createTLAS(Context &context,
436                                                               const AccelerationStructureLayout &asLayout,
437                                                               VkCommandBuffer cmdBuffer,
438                                                               const VkGeometryFlagsKHR &bottomLevelGeometryFlags,
439                                                               const ASPropertyProvider *optASPropertyProviderPtr,
440                                                               IGridASFeedback *optASFeedbackPtr) const final
441     {
442         Allocator &allocator                   = context.getDefaultAllocator();
443         const DeviceInterface &deviceInterface = context.getDeviceInterface();
444         const VkDevice deviceVk                = context.getDevice();
445         const auto nCells                      = m_gridSizeXYZ.x() * m_gridSizeXYZ.y() * m_gridSizeXYZ.z();
446         std::unique_ptr<TopLevelAccelerationStructure> resultPtr;
447         de::MovePtr<TopLevelAccelerationStructure> tlPtr = makeTopLevelAccelerationStructure();
448 
449         DE_ASSERT(((asLayout == AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES) &&
450                    (m_geometryType == GeometryType::AABB_AND_TRIANGLES)) ||
451                   ((asLayout != AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES) &&
452                    (m_geometryType != GeometryType::AABB_AND_TRIANGLES)));
453 
454         switch (asLayout)
455         {
456         case AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY:
457         {
458             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
459 
460             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
461             const auto cullMask = (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getCullMask(0, 0) :
462                                                                           static_cast<uint8_t>(0xFF);
463             const auto instanceCustomIndex =
464                 (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getInstanceCustomIndex(0, 0) : 0;
465 
466             tlPtr->setInstanceCount(1);
467 
468             {
469                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
470 
471                 blPtr->setGeometryCount(1u);
472                 blPtr->addGeometry(vertexVec, (m_geometryType == GeometryType::TRIANGLES), bottomLevelGeometryFlags);
473 
474                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
475 
476                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
477                                    instanceCustomIndex, cullMask);
478             }
479 
480             if (optASFeedbackPtr != nullptr)
481             {
482                 for (auto nCell = 0u; nCell < nCells; nCell++)
483                 {
484                     const auto cellX = (((nCell) % m_gridSizeXYZ.x()));
485                     const auto cellY = (((nCell / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
486                     const auto cellZ = (((nCell / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
487 
488                     optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
489                     optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
490                                                                           instanceCustomIndex);
491                 }
492             }
493 
494             break;
495         }
496 
497         case AccelerationStructureLayout::ONE_TL_ONE_BL_MANY_GEOMETRIES:
498         {
499             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
500 
501             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
502             const auto nVerticesPerPrimitive =
503                 (m_geometryType == GeometryType::AABB) ? 2u : 12u /* tris */ * 3 /* verts */;
504             const auto cullMask = (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getCullMask(0, 0) :
505                                                                           static_cast<uint8_t>(0xFF);
506             const auto instanceCustomIndex =
507                 (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getInstanceCustomIndex(0, 0) : 0;
508 
509             DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
510 
511             tlPtr->setInstanceCount(1);
512 
513             {
514                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
515                 const auto nGeometries                              = vertexVec.size() / nVerticesPerPrimitive;
516 
517                 blPtr->setGeometryCount(nGeometries);
518 
519                 for (uint32_t nGeometry = 0; nGeometry < nGeometries; ++nGeometry)
520                 {
521                     std::vector<tcu::Vec3> currentGeometry(nVerticesPerPrimitive);
522 
523                     for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
524                     {
525                         currentGeometry.at(nVertex) = vertexVec.at(nGeometry * nVerticesPerPrimitive + nVertex);
526                     }
527 
528                     blPtr->addGeometry(currentGeometry, (m_geometryType == GeometryType::TRIANGLES),
529                                        bottomLevelGeometryFlags);
530                 }
531 
532                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
533 
534                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
535                                    instanceCustomIndex, cullMask);
536             }
537 
538             if (optASFeedbackPtr != nullptr)
539             {
540                 for (auto nCell = 0u; nCell < nCells; nCell++)
541                 {
542                     const auto cellX = (((nCell) % m_gridSizeXYZ.x()));
543                     const auto cellY = (((nCell / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
544                     const auto cellZ = (((nCell / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
545 
546                     optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
547                     optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
548                                                                           instanceCustomIndex);
549                 }
550             }
551 
552             break;
553         }
554 
555         case AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY:
556         {
557             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
558 
559             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
560             const auto nVerticesPerPrimitive =
561                 (m_geometryType == GeometryType::AABB) ? 2u : 12u /* tris */ * 3 /* verts */;
562             const auto nInstances = vertexVec.size() / nVerticesPerPrimitive;
563 
564             DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
565 
566             tlPtr->setInstanceCount(nInstances);
567 
568             for (uint32_t nInstance = 0; nInstance < nInstances; nInstance++)
569             {
570                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
571                 const auto cullMask                                 = (optASPropertyProviderPtr != nullptr) ?
572                                                                           optASPropertyProviderPtr->getCullMask(0, nInstance) :
573                                                                           static_cast<uint8_t>(0xFF);
574                 std::vector<tcu::Vec3> currentInstanceVertexVec;
575                 const auto instanceCustomIndex = (optASPropertyProviderPtr != nullptr) ?
576                                                      optASPropertyProviderPtr->getInstanceCustomIndex(0, nInstance) :
577                                                      0;
578 
579                 for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
580                 {
581                     currentInstanceVertexVec.push_back(vertexVec.at(nInstance * nVerticesPerPrimitive + nVertex));
582                 }
583 
584                 blPtr->setGeometryCount(1u);
585                 blPtr->addGeometry(currentInstanceVertexVec, (m_geometryType == GeometryType::TRIANGLES),
586                                    bottomLevelGeometryFlags);
587 
588                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
589 
590                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
591                                    instanceCustomIndex, cullMask);
592 
593                 if (optASFeedbackPtr != nullptr)
594                 {
595                     const auto cellX = (((nInstance) % m_gridSizeXYZ.x()));
596                     const auto cellY = (((nInstance / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
597                     const auto cellZ = (((nInstance / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
598 
599                     optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
600                     optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
601                                                                           instanceCustomIndex);
602                 }
603             }
604 
605             break;
606         }
607 
608         case AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES:
609         {
610             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
611 
612             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
613             const auto nVerticesPerPrimitive =
614                 (m_geometryType == GeometryType::AABB) ? 2u : 12u /* tris */ * 3 /* verts */;
615             const auto nPrimitivesDefined = static_cast<uint32_t>(vertexVec.size() / nVerticesPerPrimitive);
616             const auto nPrimitivesPerBLAS = 4;
617             const auto nBottomLevelASes   = nPrimitivesDefined / nPrimitivesPerBLAS;
618 
619             DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
620             DE_ASSERT((nPrimitivesDefined % nPrimitivesPerBLAS) == 0);
621 
622             tlPtr->setInstanceCount(nBottomLevelASes);
623 
624             for (uint32_t nBottomLevelAS = 0; nBottomLevelAS < nBottomLevelASes; nBottomLevelAS++)
625             {
626                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
627                 const auto cullMask                                 = (optASPropertyProviderPtr != nullptr) ?
628                                                                           optASPropertyProviderPtr->getCullMask(nBottomLevelAS, 0) :
629                                                                           static_cast<uint8_t>(0xFF);
630                 const auto instanceCustomIndex =
631                     (optASPropertyProviderPtr != nullptr) ?
632                         optASPropertyProviderPtr->getInstanceCustomIndex(nBottomLevelAS, 0) :
633                         0;
634 
635                 blPtr->setGeometryCount(nPrimitivesPerBLAS);
636 
637                 for (uint32_t nGeometry = 0; nGeometry < nPrimitivesPerBLAS; nGeometry++)
638                 {
639                     std::vector<tcu::Vec3> currentVertexVec;
640 
641                     for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
642                     {
643                         currentVertexVec.push_back(vertexVec.at(
644                             (nBottomLevelAS * nPrimitivesPerBLAS + nGeometry) * nVerticesPerPrimitive + nVertex));
645                     }
646 
647                     blPtr->addGeometry(currentVertexVec, (m_geometryType == GeometryType::TRIANGLES),
648                                        bottomLevelGeometryFlags);
649                 }
650 
651                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
652                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
653                                    instanceCustomIndex, cullMask);
654 
655                 if (optASFeedbackPtr != nullptr)
656                 {
657                     for (uint32_t cellIndex = nPrimitivesPerBLAS * nBottomLevelAS;
658                          cellIndex < nPrimitivesPerBLAS * (nBottomLevelAS + 1); cellIndex++)
659                     {
660                         const auto cellX = (((cellIndex) % m_gridSizeXYZ.x()));
661                         const auto cellY = (((cellIndex / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
662                         const auto cellZ = (((cellIndex / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
663 
664                         optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
665                         optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
666                                                                               instanceCustomIndex);
667                     }
668                 }
669             }
670 
671             break;
672         }
673 
674         case AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES:
675         {
676             DE_ASSERT(m_geometryType == GeometryType::AABB_AND_TRIANGLES);
677 
678             const auto nCellsDefined      = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
679             const auto nPrimitivesPerBLAS = 1;
680             const auto nBottomLevelASes   = nCellsDefined / nPrimitivesPerBLAS;
681 
682             DE_ASSERT((nCellsDefined % nPrimitivesPerBLAS) == 0);
683 
684             tlPtr->setInstanceCount(nBottomLevelASes);
685 
686             for (uint32_t nBottomLevelAS = 0; nBottomLevelAS < nBottomLevelASes; nBottomLevelAS++)
687             {
688                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
689                 const auto cullMask                                 = (optASPropertyProviderPtr != nullptr) ?
690                                                                           optASPropertyProviderPtr->getCullMask(nBottomLevelAS, 0) :
691                                                                           static_cast<uint8_t>(0xFF);
692                 const auto instanceCustomIndex =
693                     (optASPropertyProviderPtr != nullptr) ?
694                         optASPropertyProviderPtr->getInstanceCustomIndex(nBottomLevelAS, 0) :
695                         0;
696                 const bool usesAABB              = (nBottomLevelAS % 2) == 0;
697                 const auto &vertexVec            = (usesAABB) ? m_aabbVertexVec : m_triVertexVec;
698                 const auto nVerticesPerPrimitive = (usesAABB) ? 2u : 12u /* tris */ * 3 /* verts */;
699 
700                 // For this case, AABBs use the first shader group and triangles use the second shader group in the table.
701                 const auto instanceSBTOffset = (usesAABB ? 0u : 1u);
702 
703                 blPtr->setGeometryCount(nPrimitivesPerBLAS);
704 
705                 for (uint32_t nGeometry = 0; nGeometry < nPrimitivesPerBLAS; nGeometry++)
706                 {
707                     DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
708 
709                     std::vector<tcu::Vec3> currentVertexVec;
710 
711                     for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
712                     {
713                         currentVertexVec.push_back(vertexVec.at(
714                             (nBottomLevelAS * nPrimitivesPerBLAS + nGeometry) * nVerticesPerPrimitive + nVertex));
715                     }
716 
717                     blPtr->addGeometry(currentVertexVec, !usesAABB, bottomLevelGeometryFlags);
718                 }
719 
720                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
721 
722                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
723                                    instanceCustomIndex, cullMask, instanceSBTOffset);
724 
725                 if (optASFeedbackPtr != nullptr)
726                 {
727                     for (uint32_t cellIndex = nPrimitivesPerBLAS * nBottomLevelAS;
728                          cellIndex < nPrimitivesPerBLAS * (nBottomLevelAS + 1); cellIndex++)
729                     {
730                         const auto cellX = (((cellIndex) % m_gridSizeXYZ.x()));
731                         const auto cellY = (((cellIndex / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
732                         const auto cellZ = (((cellIndex / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
733 
734                         optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
735                         optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
736                                                                               instanceCustomIndex);
737                     }
738                 }
739             }
740 
741             break;
742         }
743 
744         default:
745         {
746             deAssertFail("This should never happen", __FILE__, __LINE__);
747         }
748         }
749 
750         tlPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
751 
752         resultPtr = decltype(resultPtr)(tlPtr.release());
753         return resultPtr;
754     }
755 
getNPrimitives() const756     uint32_t getNPrimitives() const final
757     {
758         return m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
759     }
760 
setProperties(const tcu::Vec3 & gridStartXYZ,const tcu::Vec3 & gridCellSizeXYZ,const tcu::UVec3 & gridSizeXYZ,const tcu::Vec3 & gridInterCellDeltaXYZ,const GeometryType & geometryType)761     void setProperties(const tcu::Vec3 &gridStartXYZ, const tcu::Vec3 &gridCellSizeXYZ, const tcu::UVec3 &gridSizeXYZ,
762                        const tcu::Vec3 &gridInterCellDeltaXYZ, const GeometryType &geometryType)
763     {
764         m_gridStartXYZ          = gridStartXYZ;
765         m_gridCellSizeXYZ       = gridCellSizeXYZ;
766         m_gridSizeXYZ           = gridSizeXYZ;
767         m_gridInterCellDeltaXYZ = gridInterCellDeltaXYZ;
768         m_geometryType          = geometryType;
769 
770         fillVertexVec();
771     }
772 
773 private:
fillVertexVec()774     void fillVertexVec()
775     {
776         const auto nCellsNeeded = m_gridSizeXYZ.x() * m_gridSizeXYZ.y() * m_gridSizeXYZ.z();
777 
778         m_aabbVertexVec.clear();
779         m_triVertexVec.clear();
780 
781         for (auto nCell = 0u; nCell < nCellsNeeded; nCell++)
782         {
783             const auto cellX = (((nCell) % m_gridSizeXYZ.x()));
784             const auto cellY = (((nCell / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
785             const auto cellZ = (((nCell / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
786 
787             const auto cellX1Y1Z1 =
788                 tcu::Vec3(m_gridStartXYZ.x() + static_cast<float>(cellX) * m_gridInterCellDeltaXYZ.x(),
789                           m_gridStartXYZ.y() + static_cast<float>(cellY) * m_gridInterCellDeltaXYZ.y(),
790                           m_gridStartXYZ.z() + static_cast<float>(cellZ) * m_gridInterCellDeltaXYZ.z());
791             const auto cellX2Y2Z2 = tcu::Vec3(
792                 m_gridStartXYZ.x() + static_cast<float>(cellX) * m_gridInterCellDeltaXYZ.x() + m_gridCellSizeXYZ.x(),
793                 m_gridStartXYZ.y() + static_cast<float>(cellY) * m_gridInterCellDeltaXYZ.y() + m_gridCellSizeXYZ.y(),
794                 m_gridStartXYZ.z() + static_cast<float>(cellZ) * m_gridInterCellDeltaXYZ.z() + m_gridCellSizeXYZ.z());
795 
796             if (m_geometryType == GeometryType::AABB || m_geometryType == GeometryType::AABB_AND_TRIANGLES)
797             {
798                 /* Cell = AABB of the cell */
799                 m_aabbVertexVec.push_back(cellX1Y1Z1);
800                 m_aabbVertexVec.push_back(cellX2Y2Z2);
801             }
802 
803             if (m_geometryType == GeometryType::AABB_AND_TRIANGLES || m_geometryType == GeometryType::TRIANGLES)
804             {
805                 /* Cell == Six triangles forming a cube
806                  *
807                  * Lower-case characters: vertices with Z == Z2
808                  * Upper-case characters: vertices with Z == Z1
809 
810 
811                         g                h
812 
813 
814                     C              D
815 
816 
817 
818                         e                f
819 
820                     A              B
821 
822 
823                  */
824                 const auto A = tcu::Vec3(cellX1Y1Z1.x(), cellX1Y1Z1.y(), cellX1Y1Z1.z());
825                 const auto B = tcu::Vec3(cellX2Y2Z2.x(), cellX1Y1Z1.y(), cellX1Y1Z1.z());
826                 const auto C = tcu::Vec3(cellX1Y1Z1.x(), cellX2Y2Z2.y(), cellX1Y1Z1.z());
827                 const auto D = tcu::Vec3(cellX2Y2Z2.x(), cellX2Y2Z2.y(), cellX1Y1Z1.z());
828                 const auto E = tcu::Vec3(cellX1Y1Z1.x(), cellX1Y1Z1.y(), cellX2Y2Z2.z());
829                 const auto F = tcu::Vec3(cellX2Y2Z2.x(), cellX1Y1Z1.y(), cellX2Y2Z2.z());
830                 const auto G = tcu::Vec3(cellX1Y1Z1.x(), cellX2Y2Z2.y(), cellX2Y2Z2.z());
831                 const auto H = tcu::Vec3(cellX2Y2Z2.x(), cellX2Y2Z2.y(), cellX2Y2Z2.z());
832 
833                 // Z = Z1 face
834                 m_triVertexVec.push_back(A);
835                 m_triVertexVec.push_back(C);
836                 m_triVertexVec.push_back(D);
837 
838                 m_triVertexVec.push_back(D);
839                 m_triVertexVec.push_back(B);
840                 m_triVertexVec.push_back(A);
841 
842                 // Z = Z2 face
843                 m_triVertexVec.push_back(E);
844                 m_triVertexVec.push_back(H);
845                 m_triVertexVec.push_back(G);
846 
847                 m_triVertexVec.push_back(H);
848                 m_triVertexVec.push_back(E);
849                 m_triVertexVec.push_back(F);
850 
851                 // X = X0 face
852                 m_triVertexVec.push_back(A);
853                 m_triVertexVec.push_back(G);
854                 m_triVertexVec.push_back(C);
855 
856                 m_triVertexVec.push_back(G);
857                 m_triVertexVec.push_back(A);
858                 m_triVertexVec.push_back(E);
859 
860                 // X = X1 face
861                 m_triVertexVec.push_back(B);
862                 m_triVertexVec.push_back(D);
863                 m_triVertexVec.push_back(H);
864 
865                 m_triVertexVec.push_back(H);
866                 m_triVertexVec.push_back(F);
867                 m_triVertexVec.push_back(B);
868 
869                 // Y = Y0 face
870                 m_triVertexVec.push_back(C);
871                 m_triVertexVec.push_back(H);
872                 m_triVertexVec.push_back(D);
873 
874                 m_triVertexVec.push_back(H);
875                 m_triVertexVec.push_back(C);
876                 m_triVertexVec.push_back(G);
877 
878                 // Y = y1 face
879                 m_triVertexVec.push_back(A);
880                 m_triVertexVec.push_back(B);
881                 m_triVertexVec.push_back(E);
882 
883                 m_triVertexVec.push_back(B);
884                 m_triVertexVec.push_back(F);
885                 m_triVertexVec.push_back(E);
886             }
887         }
888     }
889 
890     std::vector<tcu::Vec3> m_aabbVertexVec;
891     std::vector<tcu::Vec3> m_triVertexVec;
892 
893     GeometryType m_geometryType;
894     tcu::Vec3 m_gridCellSizeXYZ;
895     tcu::Vec3 m_gridInterCellDeltaXYZ;
896     tcu::UVec3 m_gridSizeXYZ;
897     tcu::Vec3 m_gridStartXYZ;
898 };
899 
900 /* Provides an AS holding a single {(0, 0, 0), (-1, 1, 0), {1, 1, 0} tri. */
901 class TriASProvider : public ASProviderBase
902 {
903 public:
TriASProvider()904     TriASProvider()
905     {
906         /* Stub*/
907     }
908 
createTLAS(Context & context,const AccelerationStructureLayout &,VkCommandBuffer cmdBuffer,const VkGeometryFlagsKHR & bottomLevelGeometryFlags,const ASPropertyProvider * optASPropertyProviderPtr,IGridASFeedback *) const909     std::unique_ptr<TopLevelAccelerationStructure> createTLAS(Context &context,
910                                                               const AccelerationStructureLayout & /* asLayout */,
911                                                               VkCommandBuffer cmdBuffer,
912                                                               const VkGeometryFlagsKHR &bottomLevelGeometryFlags,
913                                                               const ASPropertyProvider *optASPropertyProviderPtr,
914                                                               IGridASFeedback * /* optASFeedbackPtr */) const final
915     {
916         Allocator &allocator                   = context.getDefaultAllocator();
917         const DeviceInterface &deviceInterface = context.getDeviceInterface();
918         const VkDevice deviceVk                = context.getDevice();
919         std::unique_ptr<TopLevelAccelerationStructure> resultPtr;
920         de::MovePtr<TopLevelAccelerationStructure> tlPtr = makeTopLevelAccelerationStructure();
921 
922         {
923 
924             const auto cullMask = (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getCullMask(0, 0) :
925                                                                           static_cast<uint8_t>(0xFF);
926             const auto instanceCustomIndex =
927                 (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getInstanceCustomIndex(0, 0) : 0;
928 
929             tlPtr->setInstanceCount(1);
930 
931             {
932                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
933                 const std::vector<tcu::Vec3> vertexVec = {tcu::Vec3(0, 0, 0), tcu::Vec3(-1, 1, 0), tcu::Vec3(1, 1, 0)};
934 
935                 blPtr->setGeometryCount(1u);
936                 blPtr->addGeometry(vertexVec, true, /* triangles */
937                                    bottomLevelGeometryFlags);
938 
939                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
940 
941                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
942                                    instanceCustomIndex, cullMask);
943             }
944         }
945 
946         tlPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
947 
948         resultPtr = decltype(resultPtr)(tlPtr.release());
949         return resultPtr;
950     }
951 
getNPrimitives() const952     uint32_t getNPrimitives() const final
953     {
954         return 1;
955     }
956 };
957 
958 /* Test logic providers ==> */
959 class TestBase
960 {
961 public:
~TestBase()962     virtual ~TestBase()
963     {
964         /* Stub */
965     }
966 
967     virtual tcu::UVec3 getDispatchSize() const                                       = 0;
968     virtual uint32_t getResultBufferSize() const                                     = 0;
969     virtual std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const = 0;
970     virtual void resetTLAS()                                                         = 0;
971     virtual void initAS(vkt::Context &context, RayTracingProperties *rtPropertiesPtr,
972                         VkCommandBuffer commandBuffer)                               = 0;
973     virtual void initPrograms(SourceCollections &programCollection) const            = 0;
974     virtual bool verifyResultBuffer(const void *inBufferPtr) const                   = 0;
975 
getAHitShaderCollectionShaderNames() const976     virtual std::vector<std::string> getAHitShaderCollectionShaderNames() const
977     {
978         return {"ahit"};
979     }
980 
getASBindingArraySize() const981     virtual uint32_t getASBindingArraySize() const
982     {
983         return 1u;
984     }
985 
getCallableShaderCollectionNames() const986     virtual std::vector<std::string> getCallableShaderCollectionNames() const
987     {
988         return std::vector<std::string>{};
989     }
990 
getCHitShaderCollectionShaderNames() const991     virtual std::vector<std::string> getCHitShaderCollectionShaderNames() const
992     {
993         return {"chit"};
994     }
995 
getDynamicStackSize(uint32_t maxPipelineRayRecursionDepth) const996     virtual uint32_t getDynamicStackSize(uint32_t maxPipelineRayRecursionDepth) const
997     {
998         DE_ASSERT(false);
999 
1000         DE_UNREF(maxPipelineRayRecursionDepth);
1001 
1002         return 0;
1003     }
1004 
getIntersectionShaderCollectionShaderNames() const1005     virtual std::vector<std::string> getIntersectionShaderCollectionShaderNames() const
1006     {
1007         return {"intersection"};
1008     }
1009 
getMaxRecursionDepthUsed() const1010     virtual uint32_t getMaxRecursionDepthUsed() const
1011     {
1012         return 1;
1013     }
1014 
getMissShaderCollectionShaderNames() const1015     virtual std::vector<std::string> getMissShaderCollectionShaderNames() const
1016     {
1017         return {"miss"};
1018     }
1019 
getNTraceRayInvocationsNeeded() const1020     virtual uint32_t getNTraceRayInvocationsNeeded() const
1021     {
1022         return 1;
1023     }
1024 
getPipelineLayout(const vk::DeviceInterface & deviceInterface,VkDevice deviceVk,VkDescriptorSetLayout descriptorSetLayout)1025     virtual Move<VkPipelineLayout> getPipelineLayout(const vk::DeviceInterface &deviceInterface, VkDevice deviceVk,
1026                                                      VkDescriptorSetLayout descriptorSetLayout)
1027     {
1028         return makePipelineLayout(deviceInterface, deviceVk, descriptorSetLayout);
1029     }
1030 
getResultBufferStartData() const1031     virtual std::vector<uint8_t> getResultBufferStartData() const
1032     {
1033         return std::vector<uint8_t>();
1034     }
1035 
getShaderRecordData(const ShaderGroups &) const1036     virtual const void *getShaderRecordData(const ShaderGroups & /* shaderGroup */) const
1037     {
1038         return nullptr;
1039     }
1040 
getShaderRecordSize(const ShaderGroups &) const1041     virtual uint32_t getShaderRecordSize(const ShaderGroups & /* shaderGroup */) const
1042     {
1043         return 0;
1044     }
1045 
getSpecializationInfoPtr(const VkShaderStageFlagBits &)1046     virtual VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits & /* shaderStage */)
1047     {
1048         return nullptr;
1049     }
1050 
init(vkt::Context &,RayTracingProperties *)1051     virtual bool init(vkt::Context & /* context    */, RayTracingProperties * /* rtPropsPtr */)
1052     {
1053         return true;
1054     }
1055 
onBeforeCmdTraceRays(const uint32_t &,vkt::Context &,VkCommandBuffer,VkPipelineLayout)1056     virtual void onBeforeCmdTraceRays(const uint32_t & /* nDispatch      */, vkt::Context & /* context        */,
1057                                       VkCommandBuffer /* commandBuffer  */, VkPipelineLayout /* pipelineLayout */)
1058     {
1059         /* Stub */
1060     }
1061 
onShaderStackSizeDiscovered(const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &)1062     virtual void onShaderStackSizeDiscovered(const VkDeviceSize & /* raygenShaderStackSize   */,
1063                                              const VkDeviceSize & /* ahitShaderStackSize     */,
1064                                              const VkDeviceSize & /* chitShaderStackSize     */,
1065                                              const VkDeviceSize & /* missShaderStackSize     */,
1066                                              const VkDeviceSize & /* callableShaderStackSize */,
1067                                              const VkDeviceSize & /* isectShaderStackSize    */)
1068     {
1069         /* Stub */
1070     }
1071 
usesDynamicStackSize() const1072     virtual bool usesDynamicStackSize() const
1073     {
1074         return false;
1075     }
1076 };
1077 
1078 class AABBTriTLTest : public TestBase, public ASPropertyProvider
1079 {
1080 public:
AABBTriTLTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)1081     AABBTriTLTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
1082         : m_asStructureLayout(asStructureLayout)
1083         , m_geometryType(geometryType)
1084         , m_gridSize(tcu::UVec3(720, 1, 1))
1085         , m_lastCustomInstanceIndexUsed(0)
1086     {
1087     }
1088 
~AABBTriTLTest()1089     ~AABBTriTLTest()
1090     {
1091         /* Stub */
1092     }
1093 
getAHitShaderCollectionShaderNames() const1094     virtual std::vector<std::string> getAHitShaderCollectionShaderNames() const
1095     {
1096         return {"ahit", "ahit"};
1097     }
1098 
getCHitShaderCollectionShaderNames() const1099     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
1100     {
1101         return {};
1102     }
1103 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const1104     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
1105     {
1106         DE_UNREF(nBL);
1107         DE_UNREF(nInstance);
1108 
1109         return ++m_lastCustomInstanceIndexUsed;
1110     }
1111 
getDispatchSize() const1112     tcu::UVec3 getDispatchSize() const final
1113     {
1114         return tcu::UVec3(m_gridSize[0], m_gridSize[1], m_gridSize[2]);
1115     }
1116 
getResultBufferSize() const1117     uint32_t getResultBufferSize() const final
1118     {
1119         return static_cast<uint32_t>((2 /* nHits, nMisses */ + m_gridSize[0] * m_gridSize[1] * m_gridSize[2] *
1120                                                                    1 /* hit instance custom indices */) *
1121                                      sizeof(uint32_t));
1122     }
1123 
getTLASPtrVecToBind() const1124     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
1125     {
1126         DE_ASSERT(m_tlPtr != nullptr);
1127 
1128         return {m_tlPtr.get()};
1129     }
1130 
resetTLAS()1131     void resetTLAS() final
1132     {
1133         m_tlPtr.reset();
1134     }
1135 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)1136     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
1137                 VkCommandBuffer commandBuffer) final
1138     {
1139         /* Each AS holds a single unit AABB / cube built of tris.
1140          *
1141          * Geometry in the zeroth acceleration structure starts at the origin. Subsequent ASes
1142          * hold geometry that is positioned so that geometry formed by the union of all ASes never
1143          * intersects.
1144          *
1145          * Each raygen shader invocation uses a unique origin+target pair for the traced ray, and
1146          * only one AS is expected to hold geometry that the ray can find intersection for.
1147          * The AS index is stored in the result buffer, which is later verified by the CPU.
1148          *
1149          * Due to the fact AccelerationStructureEXT array indexing must be dynamically uniform and
1150          * it is not guaranteed we can determine workgroup size on VK 1.1-conformant platforms,
1151          * we can only trace rays against the same AS in a single ray trace dispatch.
1152          */
1153         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
1154                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
1155                                                                          m_gridSize,
1156                                                                          tcu::Vec3(3, 0, 0), /* gridInterCellDeltaXYZ */
1157                                                                          m_geometryType));
1158 
1159         m_tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
1160                                             VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
1161                                             this,     /* optASPropertyProviderPtr */
1162                                             nullptr); /* optASFeedbackPtr            */
1163     }
1164 
initPrograms(SourceCollections & programCollection) const1165     void initPrograms(SourceCollections &programCollection) const final
1166     {
1167         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1168                                                   0u,    /* flags        */
1169                                                   true); /* allowSpirv14 */
1170 
1171         const char *hitPropsDefinition = "struct HitProps\n"
1172                                          "{\n"
1173                                          "    uint instanceCustomIndex;\n"
1174                                          "};\n";
1175 
1176         {
1177             std::stringstream css;
1178 
1179             css << "#version 460 core\n"
1180                    "\n"
1181                    "#extension GL_EXT_ray_tracing : require\n"
1182                    "\n"
1183                    "hitAttributeEXT vec3 unusedAttribute;\n"
1184                    "\n" +
1185                        de::toString(hitPropsDefinition) +
1186                        "\n"
1187                        "layout(location = 0) rayPayloadInEXT      uint   unusedPayload;\n"
1188                        "layout(set      = 0, binding = 0, std430) buffer result\n"
1189                        "{\n"
1190                        "    uint     nHitsRegistered;\n"
1191                        "    uint     nMissesRegistered;\n"
1192                        "    HitProps hits[];\n"
1193                        "};\n"
1194                        "\n"
1195                        "void main()\n"
1196                        "{\n"
1197                        "    uint nHit = atomicAdd(nHitsRegistered, 1);\n"
1198                        "\n"
1199                        "    hits[nHit].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
1200                        "}\n";
1201 
1202             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
1203         }
1204 
1205         {
1206             std::stringstream css;
1207 
1208             css << "#version 460 core\n"
1209                    "\n"
1210                    "#extension GL_EXT_ray_tracing : require\n"
1211                    "\n"
1212                    "hitAttributeEXT vec3 hitAttribute;\n"
1213                    "\n"
1214                    "void main()\n"
1215                    "{\n"
1216                    "    reportIntersectionEXT(0.95f, 0);\n"
1217                    "}\n";
1218 
1219             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
1220         }
1221 
1222         {
1223             std::stringstream css;
1224 
1225             css << "#version 460 core\n"
1226                    "\n"
1227                    "#extension GL_EXT_ray_tracing : require\n"
1228                    "\n" +
1229                        de::toString(hitPropsDefinition) +
1230                        "\n"
1231                        "layout(set = 0, binding = 0, std430) buffer result\n"
1232                        "{\n"
1233                        "    uint     nHitsRegistered;\n"
1234                        "    uint     nMissesRegistered;\n"
1235                        "    HitProps hits[];\n"
1236                        "};\n"
1237                        "\n"
1238                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
1239                        "\n"
1240                        "void main()\n"
1241                        "{\n"
1242                        "    atomicAdd(nMissesRegistered, 1);\n"
1243                        "}\n";
1244 
1245             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
1246         }
1247 
1248         {
1249             std::stringstream css;
1250 
1251             css << "#version 460 core\n"
1252                    "\n"
1253                    "#extension GL_EXT_ray_tracing : require\n"
1254                    "\n"
1255                    "layout(location = 0)              rayPayloadEXT uint               unusedPayload;\n"
1256                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
1257                    "\n"
1258                    "void main()\n"
1259                    "{\n"
1260                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1261                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1262                    "    uint  rayFlags     = gl_RayFlagsCullBackFacingTrianglesEXT;\n"
1263                    "    float tmin         = 0.001;\n"
1264                    "    float tmax         = 9.0;\n"
1265                    "\n"
1266                    "    uint  cullMask     = 0xFF;\n"
1267                    "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
1268                    "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
1269                    "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
1270                    "    vec3  origin       = target - vec3(0, 2, 0);\n"
1271                    "    vec3  direct       = normalize(target - origin);\n"
1272                    "\n"
1273                    "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, "
1274                    "0);\n"
1275                    "}\n";
1276 
1277             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
1278         }
1279     }
1280 
verifyResultBuffer(const void * resultDataPtr) const1281     bool verifyResultBuffer(const void *resultDataPtr) const final
1282     {
1283         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
1284         bool result                  = false;
1285 
1286         typedef struct
1287         {
1288             uint32_t instanceCustomIndex;
1289         } HitProperties;
1290 
1291         std::map<uint32_t, uint32_t> customInstanceIndexToHitCountMap;
1292         const auto nHitsReported   = *resultU32Ptr;
1293         const auto nMissesReported = *(resultU32Ptr + 1);
1294 
1295         if (nHitsReported != m_gridSize[0] * m_gridSize[1] * m_gridSize[2])
1296         {
1297             goto end;
1298         }
1299 
1300         if (nMissesReported != 0)
1301         {
1302             goto end;
1303         }
1304 
1305         for (uint32_t nHit = 0; nHit < nHitsReported; ++nHit)
1306         {
1307             const HitProperties *hitPropsPtr =
1308                 reinterpret_cast<const HitProperties *>(resultU32Ptr + 2 /* preamble ints */) + nHit;
1309 
1310             customInstanceIndexToHitCountMap[hitPropsPtr->instanceCustomIndex]++;
1311 
1312             if (customInstanceIndexToHitCountMap[hitPropsPtr->instanceCustomIndex] > 1)
1313             {
1314                 goto end;
1315             }
1316         }
1317 
1318         for (uint32_t nInstance = 0; nInstance < nHitsReported; ++nInstance)
1319         {
1320             if (customInstanceIndexToHitCountMap.find(1 + nInstance) == customInstanceIndexToHitCountMap.end())
1321             {
1322                 goto end;
1323             }
1324         }
1325 
1326         result = true;
1327     end:
1328         return result;
1329     }
1330 
1331 private:
1332     const AccelerationStructureLayout m_asStructureLayout;
1333     const GeometryType m_geometryType;
1334 
1335     const tcu::UVec3 m_gridSize;
1336     mutable uint32_t m_lastCustomInstanceIndexUsed;
1337     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
1338 };
1339 
1340 class ASStressTest : public TestBase, public ASPropertyProvider
1341 {
1342 public:
ASStressTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)1343     ASStressTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
1344         : m_asStructureLayout(asStructureLayout)
1345         , m_geometryType(geometryType)
1346         , m_lastCustomInstanceIndexUsed(0)
1347         , m_nASesToUse(0)
1348         , m_nMaxASToUse(16u)
1349     {
1350     }
1351 
~ASStressTest()1352     ~ASStressTest()
1353     {
1354         /* Stub */
1355     }
1356 
getASBindingArraySize() const1357     uint32_t getASBindingArraySize() const final
1358     {
1359         DE_ASSERT(m_nASesToUse != 0);
1360 
1361         return m_nASesToUse;
1362     }
1363 
getCHitShaderCollectionShaderNames() const1364     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
1365     {
1366         return {};
1367     }
1368 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const1369     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
1370     {
1371         DE_UNREF(nBL);
1372         DE_UNREF(nInstance);
1373 
1374         return ++m_lastCustomInstanceIndexUsed;
1375     }
1376 
getDispatchSize() const1377     tcu::UVec3 getDispatchSize() const final
1378     {
1379         return tcu::UVec3(1, 1, 1);
1380     }
1381 
getNTraceRayInvocationsNeeded() const1382     uint32_t getNTraceRayInvocationsNeeded() const final
1383     {
1384         return m_nMaxASToUse;
1385     }
1386 
getResultBufferSize() const1387     uint32_t getResultBufferSize() const final
1388     {
1389         return static_cast<uint32_t>(
1390             (2 /* nHits, nMisses */ + 2 * m_nMaxASToUse /* hit instance custom indices + AS index */) *
1391             sizeof(uint32_t));
1392     }
1393 
getTLASPtrVecToBind() const1394     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
1395     {
1396         std::vector<TopLevelAccelerationStructure *> resultVec;
1397 
1398         DE_ASSERT(m_tlPtrVec.size() != 0);
1399 
1400         for (auto &currentTLPtr : m_tlPtrVec)
1401         {
1402             resultVec.push_back(currentTLPtr.get());
1403         }
1404 
1405         return resultVec;
1406     }
1407 
resetTLAS()1408     void resetTLAS() final
1409     {
1410         for (auto &currentTLPtr : m_tlPtrVec)
1411         {
1412             currentTLPtr.reset();
1413         }
1414     }
1415 
init(vkt::Context &,RayTracingProperties * rtPropertiesPtr)1416     bool init(vkt::Context & /* context    */, RayTracingProperties *rtPropertiesPtr) final
1417     {
1418         /* NOTE: We clamp the number below to a sensible value, in case the implementation has no restrictions on the number of
1419          *         ASes accessible to shaders.
1420          */
1421         m_nASesToUse = std::min(rtPropertiesPtr->getMaxDescriptorSetAccelerationStructures(), m_nMaxASToUse);
1422 
1423         return true;
1424     }
1425 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)1426     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
1427                 VkCommandBuffer commandBuffer) final
1428     {
1429         /* Each AS holds a single unit AABB / cube built of tris.
1430          *
1431          * Geometry in the zeroth acceleration structure starts at the origin. Subsequent ASes
1432          * hold geometry that is positioned so that geometry formed by the union of all ASes never
1433          * intersects.
1434          *
1435          * Each raygen shader invocation uses a unique origin+target pair for the traced ray, and
1436          * only one AS is expected to hold geometry that the ray can find intersection for.
1437          * The AS index is stored in the result buffer, which is later verified by the CPU.
1438          *
1439          * Due to the fact AccelerationStructureEXT array indexing must be dynamically uniform and
1440          * it is not guaranteed we can determine workgroup size on VK 1.1-conformant platforms,
1441          * we can only trace rays against the same AS in a single ray trace dispatch.
1442          */
1443         std::unique_ptr<GridASProvider> asProviderPtr(
1444             new GridASProvider(tcu::Vec3(0, 0, 0),  /* gridStartXYZ          */
1445                                tcu::Vec3(1, 1, 1),  /* gridCellSizeXYZ       */
1446                                tcu::UVec3(1, 1, 1), /* gridSizeXYZ           */
1447                                tcu::Vec3(0, 0, 0),  /* gridInterCellDeltaXYZ */
1448                                m_geometryType));
1449 
1450         for (uint32_t nAS = 0; nAS < m_nASesToUse; ++nAS)
1451         {
1452             const auto origin = tcu::Vec3(3.0f * static_cast<float>(nAS), 0.0f, 0.0f);
1453 
1454             asProviderPtr->setProperties(origin, tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
1455                                          tcu::UVec3(1, 1, 1),        /* gridSizeXYZ           */
1456                                          tcu::Vec3(0, 0, 0),         /* gridInterCellDeltaXYZ */
1457                                          m_geometryType);
1458 
1459             auto tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
1460                                                    VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
1461                                                    this,     /* optASPropertyProviderPtr */
1462                                                    nullptr); /* optASFeedbackPtr            */
1463 
1464             m_tlPtrVec.push_back(std::move(tlPtr));
1465         }
1466     }
1467 
initPrograms(SourceCollections & programCollection) const1468     void initPrograms(SourceCollections &programCollection) const final
1469     {
1470         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1471                                                   0u,    /* flags        */
1472                                                   true); /* allowSpirv14 */
1473 
1474         const char *hitPropsDefinition = "struct HitProps\n"
1475                                          "{\n"
1476                                          "    uint instanceCustomIndex;\n"
1477                                          "    uint nAS;\n"
1478                                          "};\n";
1479 
1480         {
1481             std::stringstream css;
1482 
1483             css << "#version 460 core\n"
1484                    "\n"
1485                    "#extension GL_EXT_ray_tracing : require\n"
1486                    "\n"
1487                    "hitAttributeEXT vec3 unusedAttribute;\n"
1488                    "\n" +
1489                        de::toString(hitPropsDefinition) +
1490                        "\n"
1491                        "layout(location = 0) rayPayloadInEXT      uint   nAS;\n"
1492                        "layout(set      = 0, binding = 0, std430) buffer result\n"
1493                        "{\n"
1494                        "    uint     nHitsRegistered;\n"
1495                        "    uint     nMissesRegistered;\n"
1496                        "    HitProps hits[];\n"
1497                        "};\n"
1498                        "\n"
1499                        "void main()\n"
1500                        "{\n"
1501                        "    uint nHit = atomicAdd(nHitsRegistered, 1);\n"
1502                        "\n"
1503                        "    hits[nHit].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
1504                        "    hits[nHit].nAS                 = nAS;\n"
1505                        "}\n";
1506 
1507             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
1508         }
1509 
1510         {
1511             std::stringstream css;
1512 
1513             css << "#version 460 core\n"
1514                    "\n"
1515                    "#extension GL_EXT_ray_tracing : require\n"
1516                    "\n"
1517                    "hitAttributeEXT vec3 hitAttribute;\n"
1518                    "\n"
1519                    "void main()\n"
1520                    "{\n"
1521                    "    reportIntersectionEXT(0.95f, 0);\n"
1522                    "}\n";
1523 
1524             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
1525         }
1526 
1527         {
1528             std::stringstream css;
1529 
1530             css << "#version 460 core\n"
1531                    "\n"
1532                    "#extension GL_EXT_ray_tracing : require\n"
1533                    "\n" +
1534                        de::toString(hitPropsDefinition) +
1535                        "\n"
1536                        "layout(set = 0, binding = 0, std430) buffer result\n"
1537                        "{\n"
1538                        "    uint     nHitsRegistered;\n"
1539                        "    uint     nMissesRegistered;\n"
1540                        "    HitProps hits[];\n"
1541                        "};\n"
1542                        "\n"
1543                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
1544                        "\n"
1545                        "void main()\n"
1546                        "{\n"
1547                        "    atomicAdd(nMissesRegistered, 1);\n"
1548                        "}\n";
1549 
1550             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
1551         }
1552 
1553         {
1554             std::stringstream css;
1555 
1556             css << "#version 460 core\n"
1557                    "\n"
1558                    "#extension GL_EXT_ray_tracing : require\n"
1559                    "\n"
1560                    "layout(push_constant) uniform pcUB\n"
1561                    "{\n"
1562                    "    uint nAS;\n"
1563                    "} ub;\n"
1564                    "\n"
1565                    "layout(location = 0)              rayPayloadEXT uint               payload;\n"
1566                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructures[" +
1567                        de::toString(m_nMaxASToUse) +
1568                        "];\n"
1569                        "\n"
1570                        "void main()\n"
1571                        "{\n"
1572                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1573                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1574                        "    uint  rayFlags     = gl_RayFlagsCullBackFacingTrianglesEXT;\n"
1575                        "    float tmin         = 0.001;\n"
1576                        "    float tmax         = 9.0;\n"
1577                        "\n"
1578                        "    uint  cullMask     = 0xFF;\n"
1579                        "    vec3  cellStartXYZ = vec3(ub.nAS * 3.0, 0.0, 0.0);\n"
1580                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
1581                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
1582                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
1583                        "    vec3  direct       = normalize(target - origin);\n"
1584                        "\n"
1585                        "    payload = ub.nAS;\n"
1586                        "\n"
1587                        "    traceRayEXT(accelerationStructures[ub.nAS], rayFlags, cullMask, 0, 0, 0, origin, tmin, "
1588                        "direct, tmax, 0);\n"
1589                        "}\n";
1590 
1591             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
1592         }
1593     }
1594 
getPipelineLayout(const vk::DeviceInterface & deviceInterface,VkDevice deviceVk,VkDescriptorSetLayout descriptorSetLayout)1595     Move<VkPipelineLayout> getPipelineLayout(const vk::DeviceInterface &deviceInterface, VkDevice deviceVk,
1596                                              VkDescriptorSetLayout descriptorSetLayout) final
1597     {
1598         VkPushConstantRange pushConstantRange;
1599 
1600         pushConstantRange.offset     = 0;
1601         pushConstantRange.size       = sizeof(uint32_t);
1602         pushConstantRange.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR;
1603 
1604         return makePipelineLayout(deviceInterface, deviceVk, 1, /* setLayoutCount */
1605                                   &descriptorSetLayout, 1,      /* pushRangeCount */
1606                                   &pushConstantRange);
1607     }
1608 
onBeforeCmdTraceRays(const uint32_t & nDispatch,vkt::Context & context,VkCommandBuffer commandBuffer,VkPipelineLayout pipelineLayout)1609     void onBeforeCmdTraceRays(const uint32_t &nDispatch, vkt::Context &context, VkCommandBuffer commandBuffer,
1610                               VkPipelineLayout pipelineLayout) final
1611     {
1612         /* No need for a sync point in-between trace ray commands - all writes are atomic */
1613         VkMemoryBarrier memBarrier;
1614 
1615         memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1616         memBarrier.pNext         = nullptr;
1617         memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1618         memBarrier.sType         = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
1619 
1620         context.getDeviceInterface().cmdPipelineBarrier(
1621             commandBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, /* srcStageMask       */
1622             VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,                /* dstStageMask       */
1623             0,                                                           /* dependencyFlags    */
1624             1,                                                           /* memoryBarrierCount */
1625             &memBarrier, 0,                                              /* bufferMemoryBarrierCount */
1626             nullptr,                                                     /* pBufferMemoryBarriers    */
1627             0,                                                           /* imageMemoryBarrierCount  */
1628             nullptr);                                                    /* pImageMemoryBarriers     */
1629 
1630         context.getDeviceInterface().cmdPushConstants(commandBuffer, pipelineLayout, VK_SHADER_STAGE_RAYGEN_BIT_KHR,
1631                                                       0, /* offset */
1632                                                       sizeof(uint32_t), &nDispatch);
1633     }
1634 
verifyResultBuffer(const void * resultDataPtr) const1635     bool verifyResultBuffer(const void *resultDataPtr) const final
1636     {
1637         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
1638         bool result                  = false;
1639 
1640         typedef struct
1641         {
1642             uint32_t instanceCustomIndex;
1643             uint32_t nAS;
1644         } HitProperties;
1645 
1646         const auto nHitsReported   = *resultU32Ptr;
1647         const auto nMissesReported = *(resultU32Ptr + 1);
1648 
1649         if (nHitsReported != m_nMaxASToUse)
1650         {
1651             goto end;
1652         }
1653 
1654         if (nMissesReported != 0)
1655         {
1656             goto end;
1657         }
1658 
1659         for (uint32_t nHit = 0; nHit < nHitsReported; ++nHit)
1660         {
1661             const HitProperties *hitPropsPtr =
1662                 reinterpret_cast<const HitProperties *>(resultU32Ptr + 2 /* preamble ints */) + nHit;
1663 
1664             if (hitPropsPtr->instanceCustomIndex != (nHit + 1))
1665             {
1666                 goto end;
1667             }
1668 
1669             if (hitPropsPtr->nAS != nHit)
1670             {
1671                 goto end;
1672             }
1673         }
1674 
1675         result = true;
1676     end:
1677         return result;
1678     }
1679 
1680 private:
1681     const AccelerationStructureLayout m_asStructureLayout;
1682     const GeometryType m_geometryType;
1683 
1684     mutable uint32_t m_lastCustomInstanceIndexUsed;
1685     uint32_t m_nASesToUse;
1686     std::vector<std::unique_ptr<TopLevelAccelerationStructure>> m_tlPtrVec;
1687 
1688     const uint32_t m_nMaxASToUse;
1689 };
1690 
1691 class CallableShaderStressTest : public TestBase
1692 {
1693 public:
CallableShaderStressTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout,const bool & useDynamicStackSize)1694     CallableShaderStressTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout,
1695                              const bool &useDynamicStackSize)
1696         : m_asStructureLayout(asStructureLayout)
1697         , m_geometryType(geometryType)
1698         , m_gridSizeXYZ(tcu::UVec3(128, 1, 1))
1699         , m_nMaxCallableLevels((useDynamicStackSize) ? 8 : 2 /* as per spec */)
1700         , m_useDynamicStackSize(useDynamicStackSize)
1701         , m_ahitShaderStackSize(0)
1702         , m_callableShaderStackSize(0)
1703         , m_chitShaderStackSize(0)
1704         , m_isectShaderStackSize(0)
1705         , m_missShaderStackSize(0)
1706         , m_raygenShaderStackSize(0)
1707     {
1708     }
1709 
~CallableShaderStressTest()1710     ~CallableShaderStressTest()
1711     {
1712         /* Stub */
1713     }
1714 
getCallableShaderCollectionNames() const1715     std::vector<std::string> getCallableShaderCollectionNames() const final
1716     {
1717         std::vector<std::string> resultVec(m_nMaxCallableLevels);
1718 
1719         for (uint32_t nLevel = 0; nLevel < m_nMaxCallableLevels; nLevel++)
1720         {
1721             resultVec.at(nLevel) = "call" + de::toString(nLevel);
1722         }
1723 
1724         return resultVec;
1725     }
1726 
getDispatchSize() const1727     tcu::UVec3 getDispatchSize() const final
1728     {
1729         DE_ASSERT(m_gridSizeXYZ[0] != 0);
1730         DE_ASSERT(m_gridSizeXYZ[1] != 0);
1731         DE_ASSERT(m_gridSizeXYZ[2] != 0);
1732 
1733         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
1734     }
1735 
getDynamicStackSize(const uint32_t maxPipelineRayRecursionDepth) const1736     uint32_t getDynamicStackSize(const uint32_t maxPipelineRayRecursionDepth) const final
1737     {
1738         uint32_t result                              = 0;
1739         const auto maxStackSpaceNeededForZerothTrace = static_cast<uint32_t>(de::max(
1740             de::max(m_chitShaderStackSize, m_missShaderStackSize), m_isectShaderStackSize + m_ahitShaderStackSize));
1741         const auto maxStackSpaceNeededForNonZerothTraces =
1742             static_cast<uint32_t>(de::max(m_chitShaderStackSize, m_missShaderStackSize));
1743 
1744         DE_ASSERT(m_useDynamicStackSize);
1745 
1746         result = static_cast<uint32_t>(m_raygenShaderStackSize) +
1747                  de::min(1u, maxPipelineRayRecursionDepth) * maxStackSpaceNeededForZerothTrace +
1748                  de::max(0u, maxPipelineRayRecursionDepth - 1) * maxStackSpaceNeededForNonZerothTraces +
1749                  m_nMaxCallableLevels * static_cast<uint32_t>(m_callableShaderStackSize);
1750 
1751         DE_ASSERT(result != 0);
1752         return result;
1753     }
1754 
getResultBufferSize() const1755     uint32_t getResultBufferSize() const final
1756     {
1757         const auto nRaysTraced                          = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
1758         const auto nClosestHitShaderInvocationsExpected = nRaysTraced / 2;
1759         const auto nMissShaderInvocationsExpected       = nRaysTraced / 2;
1760         const auto resultItemSize =
1761             sizeof(uint32_t) * 3 /* shaderStage, nOriginRay, nLevel */ + sizeof(float) * m_nMaxCallableLevels;
1762 
1763         DE_ASSERT((nRaysTraced % 2) == 0);
1764         DE_ASSERT(m_nMaxCallableLevels != 0);
1765         DE_ASSERT(m_gridSizeXYZ[0] != 0);
1766         DE_ASSERT(m_gridSizeXYZ[1] != 0);
1767         DE_ASSERT(m_gridSizeXYZ[2] != 0);
1768 
1769         return static_cast<uint32_t>(
1770             sizeof(uint32_t) /* nItemsStored */ +
1771             (resultItemSize * m_nMaxCallableLevels) *
1772                 (nRaysTraced + nMissShaderInvocationsExpected + nClosestHitShaderInvocationsExpected));
1773     }
1774 
getTLASPtrVecToBind() const1775     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
1776     {
1777         DE_ASSERT(m_tlPtr != nullptr);
1778 
1779         return {m_tlPtr.get()};
1780     }
1781 
init(vkt::Context &,RayTracingProperties * rtPropertiesPtr)1782     bool init(vkt::Context & /* context    */, RayTracingProperties *rtPropertiesPtr) final
1783     {
1784         DE_UNREF(rtPropertiesPtr);
1785         return true;
1786     }
1787 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)1788     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
1789                 VkCommandBuffer commandBuffer) final
1790     {
1791         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
1792                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
1793                                                                          m_gridSizeXYZ,
1794                                                                          tcu::Vec3(6, 0, 0), /* gridInterCellDeltaXYZ */
1795                                                                          m_geometryType));
1796 
1797         m_tlPtr =
1798             asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer, 0, /* bottomLevelGeometryFlags */
1799                                       nullptr,                                        /* optASPropertyProviderPtr */
1800                                       nullptr);                                       /* optASFeedbackPtr            */
1801     }
1802 
initPrograms(SourceCollections & programCollection) const1803     void initPrograms(SourceCollections &programCollection) const final
1804     {
1805         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1806                                                   0u,    /* flags        */
1807                                                   true); /* allowSpirv14 */
1808 
1809         std::vector<std::string> callableDataDefinitions(m_nMaxCallableLevels);
1810         std::vector<std::string> callableDataInDefinitions(m_nMaxCallableLevels);
1811 
1812         for (uint32_t nCallableDataLevel = 0; nCallableDataLevel < m_nMaxCallableLevels; ++nCallableDataLevel)
1813         {
1814             const auto locationsPerCallableData = (3 /* uints */ + (nCallableDataLevel + 1) /* dataChunks */);
1815             const auto callableDataLocation     = locationsPerCallableData * nCallableDataLevel;
1816 
1817             callableDataDefinitions.at(nCallableDataLevel) =
1818                 "layout (location = " + de::toString(callableDataLocation) +
1819                 ") callableDataEXT struct\n"
1820                 "{\n"
1821                 "    uint  shaderStage;\n"
1822                 "    uint  nOriginRay;\n"
1823                 "    uint  nLevel;\n"
1824                 "    float dataChunk[" +
1825                 de::toString(nCallableDataLevel + 1) +
1826                 "];\n"
1827                 "} callableData" +
1828                 de::toString(nCallableDataLevel) + ";\n";
1829 
1830             callableDataInDefinitions.at(nCallableDataLevel) =
1831                 "layout(location = " + de::toString(callableDataLocation) +
1832                 ") callableDataInEXT struct\n"
1833                 "{\n"
1834                 "    uint  shaderStage;\n"
1835                 "    uint  nOriginRay;\n"
1836                 "    uint  nLevel;\n"
1837                 "    float dataChunk[" +
1838                 de::toString(nCallableDataLevel + 1) +
1839                 "];\n"
1840                 "} inData;\n";
1841 
1842             m_callableDataLevelToCallableDataLocation[nCallableDataLevel] = callableDataLocation;
1843         }
1844 
1845         const auto resultBufferDefinition = "struct ResultData\n"
1846                                             "{\n"
1847                                             "    uint  shaderStage;\n"
1848                                             "    uint  nOriginRay;\n"
1849                                             "    uint  nLevel;\n"
1850                                             "    float dataChunk[" +
1851                                             de::toString(m_nMaxCallableLevels) +
1852                                             "];\n"
1853                                             "};\n"
1854                                             "\n"
1855                                             "layout(set = 0, binding = 0, std430) buffer result\n"
1856                                             "{\n"
1857                                             "    uint       nInvocationsRegistered;\n"
1858                                             "    ResultData resultData[];\n"
1859                                             "};\n";
1860 
1861         {
1862             std::stringstream css;
1863 
1864             /* NOTE: executeCallable() is unavailable in ahit stage */
1865             css << "#version 460 core\n"
1866                    "\n"
1867                    "#extension GL_EXT_ray_tracing : require\n"
1868                    "\n"
1869                    "layout(location = 128) rayPayloadInEXT uint unusedPayload;\n"
1870                    "\n"
1871                    "void main()\n"
1872                    "{\n"
1873                    "}\n";
1874 
1875             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
1876         }
1877 
1878         {
1879             std::stringstream css;
1880 
1881             css << "#version 460 core\n"
1882                    "\n"
1883                    "#extension GL_EXT_ray_tracing : require\n"
1884                    "\n"
1885                    "layout(location = 128) rayPayloadInEXT uint rayIndex;\n"
1886                    "\n" +
1887                        de::toString(callableDataDefinitions.at(0)) + de::toString(resultBufferDefinition) +
1888                        "void main()\n"
1889                        "{\n"
1890                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1891                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1892                        "\n"
1893                        "    callableData0.shaderStage  = 3;\n"
1894                        "    callableData0.nOriginRay   = nInvocation;\n"
1895                        "    callableData0.nLevel       = 0;\n"
1896                        "    callableData0.dataChunk[0] = float(nInvocation);\n"
1897                        "\n"
1898                        "    executeCallableEXT(0 /* sbtRecordIndex */, " +
1899                        de::toString(m_callableDataLevelToCallableDataLocation.at(0)) +
1900                        ");\n"
1901                        "}\n";
1902 
1903             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
1904         }
1905 
1906         {
1907             std::stringstream css;
1908 
1909             /* NOTE: executeCallable() is unavailable in isect stage */
1910             css << "#version 460 core\n"
1911                    "\n"
1912                    "#extension GL_EXT_ray_tracing : require\n"
1913                    "\n"
1914                    "void main()\n"
1915                    "{\n"
1916                    "    reportIntersectionEXT(0.95f, 0);\n"
1917                    "}\n";
1918 
1919             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
1920         }
1921 
1922         {
1923             std::stringstream css;
1924 
1925             css << "#version 460 core\n"
1926                    "\n"
1927                    "#extension GL_EXT_ray_tracing : require\n"
1928                    "\n" +
1929                        de::toString(callableDataDefinitions.at(0)) + de::toString(resultBufferDefinition) +
1930                        "\n"
1931                        "void main()\n"
1932                        "{\n"
1933                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1934                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1935                        "\n"
1936                        "    callableData0.shaderStage  = 2;\n"
1937                        "    callableData0.nOriginRay   = nInvocation;\n"
1938                        "    callableData0.nLevel       = 0;\n"
1939                        "    callableData0.dataChunk[0] = float(nInvocation);\n"
1940                        "\n"
1941                        "    executeCallableEXT(0 /* sbtRecordIndex */, " +
1942                        de::toString(m_callableDataLevelToCallableDataLocation.at(0)) +
1943                        ");\n"
1944                        "}\n";
1945 
1946             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
1947         }
1948 
1949         {
1950             std::stringstream css;
1951 
1952             css << "#version 460 core\n"
1953                    "\n"
1954                    "#extension GL_EXT_ray_tracing : require\n"
1955                    "\n" +
1956                        de::toString(callableDataDefinitions.at(0)) +
1957                        "layout(location = 128)            rayPayloadEXT uint               unusedPayload;\n"
1958                        "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
1959                        "\n"
1960                        "void main()\n"
1961                        "{\n"
1962                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1963                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1964                        "    uint  rayFlags     = 0;\n"
1965                        "    float tmin         = 0.001;\n"
1966                        "    float tmax         = 9.0;\n"
1967                        "\n"
1968                        "    uint  cullMask     = 0xFF;\n"
1969                        "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
1970                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
1971                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
1972                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
1973                        "    vec3  direct       = normalize(target - origin);\n"
1974                        "\n"
1975                        "    callableData0.shaderStage  = 0;\n"
1976                        "    callableData0.nOriginRay   = nInvocation;\n"
1977                        "    callableData0.nLevel       = 0;\n"
1978                        "    callableData0.dataChunk[0] = float(nInvocation);\n"
1979                        "\n"
1980                        "    executeCallableEXT(0 /* sbtRecordIndex */, " +
1981                        de::toString(m_callableDataLevelToCallableDataLocation.at(0)) +
1982                        ");\n"
1983                        "\n"
1984                        "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, "
1985                        "tmax, 128);\n"
1986                        "}\n";
1987 
1988             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
1989         }
1990 
1991         for (uint32_t nCallableShader = 0; nCallableShader < m_nMaxCallableLevels; ++nCallableShader)
1992         {
1993             const bool canInvokeExecutable = (nCallableShader != (m_nMaxCallableLevels - 1));
1994             std::stringstream css;
1995 
1996             css << "#version 460 core\n"
1997                    "\n"
1998                    "#extension GL_EXT_ray_tracing : require\n"
1999                    "\n" +
2000                        de::toString(resultBufferDefinition);
2001 
2002             if ((nCallableShader + 1) != m_nMaxCallableLevels)
2003             {
2004                 css << de::toString(callableDataDefinitions.at(nCallableShader + 1));
2005             }
2006 
2007             css << callableDataInDefinitions[nCallableShader] +
2008                        "\n"
2009                        "void main()\n"
2010                        "{\n"
2011                        "    uint nInvocation = atomicAdd(nInvocationsRegistered, 1);\n"
2012                        "\n"
2013                        "    resultData[nInvocation].shaderStage = inData.shaderStage;\n"
2014                        "    resultData[nInvocation].nOriginRay  = inData.nOriginRay;\n"
2015                        "    resultData[nInvocation].nLevel      = inData.nLevel;\n";
2016 
2017             for (uint32_t nLevel = 0; nLevel < nCallableShader + 1; ++nLevel)
2018             {
2019                 css << "    resultData[nInvocation].dataChunk[" + de::toString(nLevel) + "] = inData.dataChunk[" +
2020                            de::toString(nLevel) + "];\n";
2021             }
2022 
2023             if (canInvokeExecutable)
2024             {
2025                 css << "\n"
2026                        "    callableData" +
2027                            de::toString(nCallableShader + 1) +
2028                            ".shaderStage = 1;\n"
2029                            "    callableData" +
2030                            de::toString(nCallableShader + 1) +
2031                            ".nOriginRay  = inData.nOriginRay;\n"
2032                            "    callableData" +
2033                            de::toString(nCallableShader + 1) + ".nLevel      = " + de::toString(nCallableShader) +
2034                            ";\n"
2035                            "\n";
2036 
2037                 for (uint32_t nLevel = 0; nLevel <= nCallableShader + 1; ++nLevel)
2038                 {
2039                     css << "    callableData" + de::toString(nCallableShader + 1) + ".dataChunk[" +
2040                                de::toString(nLevel) + "] = float(inData.nOriginRay + " + de::toString(nLevel) + ");\n";
2041                 }
2042 
2043                 css << "\n"
2044                        "    executeCallableEXT(" +
2045                            de::toString(nCallableShader + 1) + ", " +
2046                            de::toString(m_callableDataLevelToCallableDataLocation[nCallableShader + 1]) + ");\n";
2047             }
2048 
2049             css << "\n"
2050                    "};\n";
2051 
2052             programCollection.glslSources.add("call" + de::toString(nCallableShader))
2053                 << glu::CallableSource(css.str()) << buildOptions;
2054         }
2055     }
2056 
onShaderStackSizeDiscovered(const VkDeviceSize & raygenShaderStackSize,const VkDeviceSize & ahitShaderStackSize,const VkDeviceSize & chitShaderStackSize,const VkDeviceSize & missShaderStackSize,const VkDeviceSize & callableShaderStackSize,const VkDeviceSize & isectShaderStackSize)2057     void onShaderStackSizeDiscovered(const VkDeviceSize &raygenShaderStackSize, const VkDeviceSize &ahitShaderStackSize,
2058                                      const VkDeviceSize &chitShaderStackSize, const VkDeviceSize &missShaderStackSize,
2059                                      const VkDeviceSize &callableShaderStackSize,
2060                                      const VkDeviceSize &isectShaderStackSize) final
2061     {
2062         m_ahitShaderStackSize     = ahitShaderStackSize;
2063         m_callableShaderStackSize = callableShaderStackSize;
2064         m_chitShaderStackSize     = chitShaderStackSize;
2065         m_isectShaderStackSize    = isectShaderStackSize;
2066         m_missShaderStackSize     = missShaderStackSize;
2067         m_raygenShaderStackSize   = raygenShaderStackSize;
2068     }
2069 
resetTLAS()2070     void resetTLAS() final
2071     {
2072         m_tlPtr.reset();
2073     }
2074 
usesDynamicStackSize() const2075     bool usesDynamicStackSize() const final
2076     {
2077         return m_useDynamicStackSize;
2078     }
2079 
verifyResultBuffer(const void * resultDataPtr) const2080     bool verifyResultBuffer(const void *resultDataPtr) const final
2081     {
2082         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
2083         bool result                  = false;
2084         const auto nItemsStored      = *resultU32Ptr;
2085 
2086         /* Convert raw binary data into a human-readable vector representation */
2087         struct ResultItem
2088         {
2089             VkShaderStageFlagBits shaderStage;
2090             uint32_t nLevel;
2091             std::vector<float> dataChunk;
2092 
2093             ResultItem() : shaderStage(VK_SHADER_STAGE_ALL), nLevel(0)
2094             {
2095                 /* Stub */
2096             }
2097         };
2098 
2099         std::map<uint32_t, std::vector<ResultItem>> nRayToResultItemVecMap;
2100 
2101         for (uint32_t nItem = 0; nItem < nItemsStored; ++nItem)
2102         {
2103             const uint32_t *itemDataPtr = resultU32Ptr + 1 /* nItemsStored */ +
2104                                           nItem * (3 /* preamble ints */ + m_nMaxCallableLevels /* received data */);
2105             ResultItem item;
2106             const auto &nOriginRay = *(itemDataPtr + 1);
2107 
2108             item.dataChunk.resize(m_nMaxCallableLevels);
2109 
2110             switch (*itemDataPtr)
2111             {
2112             case 0:
2113                 item.shaderStage = VK_SHADER_STAGE_RAYGEN_BIT_KHR;
2114                 break;
2115             case 1:
2116                 item.shaderStage = VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2117                 break;
2118             case 2:
2119                 item.shaderStage = VK_SHADER_STAGE_MISS_BIT_KHR;
2120                 break;
2121             case 3:
2122                 item.shaderStage = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR;
2123                 break;
2124 
2125             default:
2126             {
2127                 deAssertFail("This should never happen", __FILE__, __LINE__);
2128             }
2129             }
2130 
2131             item.nLevel = *(itemDataPtr + 2);
2132 
2133             memcpy(item.dataChunk.data(), itemDataPtr + 3, m_nMaxCallableLevels * sizeof(float));
2134 
2135             nRayToResultItemVecMap[nOriginRay].push_back(item);
2136         }
2137 
2138         for (uint32_t nRay = 0; nRay < m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2]; ++nRay)
2139         {
2140             /* 1. Make sure each ray generated the anticipated number of stores */
2141             const bool closestHitShaderInvoked            = (nRay % 2) == 0;
2142             const bool missShaderInvoked                  = (nRay % 2) != 0;
2143             const uint32_t nShaderStagesInvokingCallables = 1 + /* raygen */
2144                                                             ((closestHitShaderInvoked) ? 1 : 0) +
2145                                                             ((missShaderInvoked) ? 1 : 0);
2146             auto rayIterator = nRayToResultItemVecMap.find(nRay);
2147 
2148             if (rayIterator == nRayToResultItemVecMap.end())
2149             {
2150                 goto end;
2151             }
2152 
2153             if (rayIterator->second.size() != nShaderStagesInvokingCallables * m_nMaxCallableLevels)
2154             {
2155                 goto end;
2156             }
2157 
2158             /* 2. Make sure each shader stage generated the anticipated number of result items */
2159             {
2160                 uint32_t nCallableShaderStageItemsFound   = 0;
2161                 uint32_t nClosestHitShaderStageItemsFound = 0;
2162                 uint32_t nMissShaderStageItemsFound       = 0;
2163                 uint32_t nRaygenShaderStageItemsFound     = 0;
2164 
2165                 for (const auto &currentItem : rayIterator->second)
2166                 {
2167                     if (currentItem.shaderStage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
2168                     {
2169                         nRaygenShaderStageItemsFound++;
2170                     }
2171                     else if (currentItem.shaderStage == VK_SHADER_STAGE_CALLABLE_BIT_KHR)
2172                     {
2173                         nCallableShaderStageItemsFound++;
2174                     }
2175                     else if (currentItem.shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR)
2176                     {
2177                         nMissShaderStageItemsFound++;
2178                     }
2179                     else if (currentItem.shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
2180                     {
2181                         nClosestHitShaderStageItemsFound++;
2182                     }
2183                     else
2184                     {
2185                         DE_ASSERT(false);
2186                     }
2187                 }
2188 
2189                 if (nRaygenShaderStageItemsFound != 1)
2190                 {
2191                     goto end;
2192                 }
2193 
2194                 /* Even rays hit geometry. Odd ones don't */
2195                 if (!missShaderInvoked)
2196                 {
2197                     if (nClosestHitShaderStageItemsFound == 0)
2198                     {
2199                         goto end;
2200                     }
2201 
2202                     if (nMissShaderStageItemsFound != 0)
2203                     {
2204                         goto end;
2205                     }
2206                 }
2207                 else
2208                 {
2209                     if (nClosestHitShaderStageItemsFound != 0)
2210                     {
2211                         goto end;
2212                     }
2213 
2214                     if (nMissShaderStageItemsFound != 1)
2215                     {
2216                         goto end;
2217                     }
2218                 }
2219 
2220                 if (nCallableShaderStageItemsFound != nShaderStagesInvokingCallables * (m_nMaxCallableLevels - 1))
2221                 {
2222                     goto end;
2223                 }
2224             }
2225 
2226             /* 3. Verify data chunk's correctness */
2227             {
2228                 for (const auto &currentItem : rayIterator->second)
2229                 {
2230                     const auto nValidItemsRequired =
2231                         (currentItem.shaderStage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)      ? 1 :
2232                         (currentItem.shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR)        ? 1 :
2233                         (currentItem.shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) ? 1 :
2234                                                                                            (currentItem.nLevel + 1);
2235 
2236                     for (uint32_t nItem = 0; nItem < nValidItemsRequired; ++nItem)
2237                     {
2238                         if (fabsf(currentItem.dataChunk.at(nItem) - static_cast<float>(nRay + nItem)) > 1e-3f)
2239                         {
2240                             goto end;
2241                         }
2242                     }
2243                 }
2244             }
2245 
2246             /* 4. Verify all shader levels have been reported for relevant shader stages */
2247             {
2248                 std::map<VkShaderStageFlagBits, std::vector<uint32_t>> shaderStageToLevelVecReportedMap;
2249 
2250                 for (const auto &currentItem : rayIterator->second)
2251                 {
2252                     shaderStageToLevelVecReportedMap[currentItem.shaderStage].push_back(currentItem.nLevel);
2253                 }
2254 
2255                 if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_RAYGEN_BIT_KHR).size() != 1 ||
2256                     shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_RAYGEN_BIT_KHR).at(0) != 0)
2257                 {
2258                     goto end;
2259                 }
2260 
2261                 if (closestHitShaderInvoked)
2262                 {
2263                     if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR).size() != 1 ||
2264                         shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR).at(0) != 0)
2265                     {
2266                         goto end;
2267                     }
2268                 }
2269                 else
2270                 {
2271                     if (shaderStageToLevelVecReportedMap.find(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) !=
2272                         shaderStageToLevelVecReportedMap.end())
2273                     {
2274                         goto end;
2275                     }
2276                 }
2277 
2278                 if (missShaderInvoked)
2279                 {
2280                     if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_MISS_BIT_KHR).size() != 1 ||
2281                         shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_MISS_BIT_KHR).at(0) != 0)
2282                     {
2283                         goto end;
2284                     }
2285                 }
2286                 else
2287                 {
2288                     if (shaderStageToLevelVecReportedMap.find(VK_SHADER_STAGE_MISS_BIT_KHR) !=
2289                         shaderStageToLevelVecReportedMap.end())
2290                     {
2291                         goto end;
2292                     }
2293                 }
2294 
2295                 if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CALLABLE_BIT_KHR).size() !=
2296                     nShaderStagesInvokingCallables * (m_nMaxCallableLevels - 1))
2297                 {
2298                     goto end;
2299                 }
2300 
2301                 for (uint32_t nLevel = 0; nLevel < m_nMaxCallableLevels - 1; ++nLevel)
2302                 {
2303                     const auto &vec  = shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CALLABLE_BIT_KHR);
2304                     auto vecIterator = std::find(vec.begin(), vec.end(), nLevel);
2305 
2306                     if (vecIterator == vec.end())
2307                     {
2308                         goto end;
2309                     }
2310                 }
2311             }
2312         }
2313 
2314         result = true;
2315     end:
2316         return result;
2317     }
2318 
2319 private:
2320     const AccelerationStructureLayout m_asStructureLayout;
2321     const GeometryType m_geometryType;
2322 
2323     const tcu::UVec3 m_gridSizeXYZ;
2324     const uint32_t m_nMaxCallableLevels;
2325     const bool m_useDynamicStackSize;
2326     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
2327 
2328     VkDeviceSize m_ahitShaderStackSize;
2329     VkDeviceSize m_callableShaderStackSize;
2330     VkDeviceSize m_chitShaderStackSize;
2331     VkDeviceSize m_isectShaderStackSize;
2332     VkDeviceSize m_missShaderStackSize;
2333     VkDeviceSize m_raygenShaderStackSize;
2334 
2335     mutable std::map<uint32_t, uint32_t> m_callableDataLevelToCallableDataLocation;
2336 };
2337 
2338 class CullMaskTest : public TestBase, public ASPropertyProvider
2339 {
2340 public:
CullMaskTest(const AccelerationStructureLayout & asLayout,const GeometryType & geometryType,const bool & useExtraCullMaskBits)2341     CullMaskTest(const AccelerationStructureLayout &asLayout, const GeometryType &geometryType,
2342                  const bool &useExtraCullMaskBits)
2343         : m_asLayout(asLayout)
2344         , m_geometryType(geometryType)
2345         , m_nMaxHitsToRegister(256)
2346         , m_nRaysPerInvocation(4)
2347         , m_useExtraCullMaskBits(useExtraCullMaskBits)
2348         , m_lastCustomInstanceIndexUsed(0)
2349         , m_nCullMasksUsed(1)
2350     {
2351         /* Stub */
2352     }
2353 
~CullMaskTest()2354     ~CullMaskTest()
2355     {
2356         /* Stub */
2357     }
2358 
getCHitShaderCollectionShaderNames() const2359     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
2360     {
2361         return {};
2362     }
2363 
getCullMask(const uint32_t & nBL,const uint32_t & nInstance) const2364     uint8_t getCullMask(const uint32_t &nBL, const uint32_t &nInstance) const final
2365     {
2366         DE_UNREF(nBL);
2367         DE_UNREF(nInstance);
2368 
2369         uint8_t result = (m_nCullMasksUsed++) & 0xFF;
2370 
2371         DE_ASSERT(result != 0);
2372         return result;
2373     }
2374 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const2375     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
2376     {
2377         DE_UNREF(nBL);
2378         DE_UNREF(nInstance);
2379 
2380         /* NOTE: The formula below generates a sequence of unique large values. */
2381         uint32_t result = (m_lastCustomInstanceIndexUsed * 7 + 153325) & ((1 << 24) - 1);
2382 
2383         if (m_instanceCustomIndexVec.size() <= nInstance)
2384         {
2385             m_instanceCustomIndexVec.resize(nInstance + 1);
2386         }
2387 
2388         m_instanceCustomIndexVec[nInstance] = result;
2389         m_lastCustomInstanceIndexUsed       = result;
2390 
2391         return result;
2392     }
2393 
getDispatchSize() const2394     tcu::UVec3 getDispatchSize() const final
2395     {
2396         //< 3*5*17 == 255, which coincidentally is the maximum cull mask value the spec permits.
2397         //<
2398         //< This global WG size is excessively large if m_nRaysPerInvocation > 1 but the raygen shader has
2399         //< a guard condition check that drops extraneous invocations.
2400         return tcu::UVec3(3, 5, 17);
2401     }
2402 
getResultBufferSize() const2403     uint32_t getResultBufferSize() const final
2404     {
2405         return static_cast<uint32_t>((1 + m_nMaxHitsToRegister * 2) * sizeof(uint32_t));
2406     }
2407 
getTLASPtrVecToBind() const2408     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
2409     {
2410         return {m_tlPtr.get()};
2411     }
2412 
resetTLAS()2413     void resetTLAS() final
2414     {
2415         m_tlPtr.reset();
2416     }
2417 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)2418     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
2419                 VkCommandBuffer commandBuffer) final
2420     {
2421         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),          /* gridStartXYZ          */
2422                                                  tcu::Vec3(1, 1, 1),          /* gridCellSizeXYZ       */
2423                                                  tcu::UVec3(3, 5, 17),        /* gridSizeXYZ           */
2424                                                  tcu::Vec3(2.0f, 2.0f, 2.0f), /* gridInterCellDeltaXYZ */
2425                                                  m_geometryType));
2426 
2427         m_tlPtr = m_asProviderPtr->createTLAS(context, m_asLayout, commandBuffer,
2428                                               VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
2429                                               this,     /* optASPropertyProviderPtr */
2430                                               nullptr); /* optASFeedbackPtr         */
2431     }
2432 
initPrograms(SourceCollections & programCollection) const2433     void initPrograms(SourceCollections &programCollection) const final
2434     {
2435         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
2436                                                   0u,    /* flags        */
2437                                                   true); /* allowSpirv14 */
2438 
2439         const char *hitPropsDefinition = "struct HitProps\n"
2440                                          "{\n"
2441                                          "    uint rayIndex;\n"
2442                                          "    uint instanceCustomIndex;\n"
2443                                          "};\n";
2444 
2445         {
2446             std::stringstream css;
2447 
2448             css << "#version 460 core\n"
2449                    "\n"
2450                    "#extension GL_EXT_ray_tracing : require\n"
2451                    "\n"
2452                    "hitAttributeEXT vec3 unusedAttribute;\n"
2453                    "\n" +
2454                        de::toString(hitPropsDefinition) +
2455                        "\n"
2456                        "layout(location = 0) rayPayloadInEXT      uint   nRay;\n"
2457                        "layout(set      = 0, binding = 0, std430) buffer result\n"
2458                        "{\n"
2459                        "    uint     nHitsRegistered;\n"
2460                        "    uint     nMissesRegistered;\n"
2461                        "    HitProps hits[];\n"
2462                        "};\n"
2463                        "\n"
2464                        "void main()\n"
2465                        "{\n"
2466                        "    uint nHit = atomicAdd(nHitsRegistered, 1);\n"
2467                        "\n"
2468                        "    if (nHit < " +
2469                        de::toString(m_nMaxHitsToRegister) +
2470                        ")\n"
2471                        "    {\n"
2472                        "        hits[nHit].rayIndex            = nRay;\n"
2473                        "        hits[nHit].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
2474                        "    }\n"
2475                        "}\n";
2476 
2477             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
2478         }
2479 
2480         {
2481             std::stringstream css;
2482 
2483             css << "#version 460 core\n"
2484                    "\n"
2485                    "#extension GL_EXT_ray_tracing : require\n"
2486                    "\n"
2487                    "hitAttributeEXT vec3 hitAttribute;\n"
2488                    "\n"
2489                    "void main()\n"
2490                    "{\n"
2491                    "    reportIntersectionEXT(0.95f, 0);\n"
2492                    "}\n";
2493 
2494             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
2495         }
2496 
2497         {
2498             std::stringstream css;
2499 
2500             css << "#version 460 core\n"
2501                    "\n"
2502                    "#extension GL_EXT_ray_tracing : require\n"
2503                    "\n" +
2504                        de::toString(hitPropsDefinition) +
2505                        "\n"
2506                        "layout(set      = 0, binding = 0, std430) buffer result\n"
2507                        "{\n"
2508                        "    uint     nHitsRegistered;\n"
2509                        "    uint     nMissesRegistered;\n"
2510                        "    HitProps hits[];\n"
2511                        "};\n"
2512                        "\n"
2513                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
2514                        "\n"
2515                        "void main()\n"
2516                        "{\n"
2517                        "    atomicAdd(nMissesRegistered, 1);\n"
2518                        "}\n";
2519 
2520             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
2521         }
2522 
2523         {
2524             std::stringstream css;
2525 
2526             css << "#version 460 core\n"
2527                    "\n"
2528                    "#extension GL_EXT_ray_tracing : require\n"
2529                    "\n"
2530                    "layout(location = 0)              rayPayloadEXT uint               rayIndex;\n"
2531                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
2532                    "\n"
2533                    "void main()\n"
2534                    "{\n"
2535                    "    const uint nRaysPerInvocation = " +
2536                        de::toString(m_nRaysPerInvocation) +
2537                        ";\n"
2538                        "\n"
2539                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2540                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2541                        "    uint  rayFlags     = gl_RayFlagsCullBackFacingTrianglesEXT;\n"
2542                        "    float tmin         = 0.001;\n"
2543                        "    float tmax         = 4.0;\n"
2544                        "\n"
2545                        "    if (nInvocation >= 256 / nRaysPerInvocation)\n"
2546                        "    {\n"
2547                        "        return;\n"
2548                        "    }\n"
2549                        "\n"
2550                        "    for (uint nRay = 0; nRay < nRaysPerInvocation; ++nRay)\n"
2551                        "    {\n"
2552                        "        uint  cullMask     = 1 + nInvocation * nRaysPerInvocation + nRay;\n";
2553 
2554             if (m_useExtraCullMaskBits)
2555             {
2556                 css << "cullMask |= 0x00FFFFFF;\n";
2557             }
2558 
2559             css << "        uint  nCell        = nInvocation * nRaysPerInvocation + nRay;\n"
2560                    "        uvec3 cellXYZ      = uvec3(nCell % gl_LaunchSizeEXT.x, (nCell / gl_LaunchSizeEXT.x) % "
2561                    "gl_LaunchSizeEXT.y, (nCell / gl_LaunchSizeEXT.x / gl_LaunchSizeEXT.y) % gl_LaunchSizeEXT.z);\n"
2562                    "        vec3  cellStartXYZ = vec3(cellXYZ) * vec3(2.0);\n"
2563                    "        vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
2564                    "        vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
2565                    "        vec3  origin       = target - vec3(1, 1, 1);\n"
2566                    "        vec3  direct       = normalize(target - origin);\n"
2567                    "\n"
2568                    "        if (nCell < 255)\n"
2569                    "        {\n"
2570                    "            rayIndex = nCell;"
2571                    "\n"
2572                    "            traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
2573                    "        }\n"
2574                    "    }\n"
2575                    "}\n";
2576 
2577             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
2578         }
2579     }
2580 
verifyResultBuffer(const void * resultDataPtr) const2581     bool verifyResultBuffer(const void *resultDataPtr) const final
2582     {
2583         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
2584         const auto nHitsReported     = *resultU32Ptr;
2585         const auto nMissesReported   = *(resultU32Ptr + 1);
2586         bool result                  = true;
2587 
2588         // For each traced ray:
2589         //
2590         // 1. Exactly one ahit invocation per ray should be reported.
2591         // 2. All hits reported for a ray R should point to a primitive with a valid custom instance index
2592         // 3. The reported custom instance indices must be valid.
2593         std::map<uint32_t, std::vector<uint32_t>> customInstanceIndexToRayIndexVecMap;
2594         std::map<uint32_t, std::vector<uint32_t>> rayIndexToCustomInstanceIndexVecMap;
2595 
2596         typedef struct
2597         {
2598             uint32_t rayIndex;
2599             uint32_t customInstanceHit;
2600         } HitProperties;
2601 
2602         if (nHitsReported != 0xFF)
2603         {
2604             result = false;
2605 
2606             goto end;
2607         }
2608 
2609         if (nMissesReported != 0)
2610         {
2611             result = false;
2612 
2613             goto end;
2614         }
2615 
2616         for (uint32_t nHit = 0; nHit < nHitsReported; ++nHit)
2617         {
2618             const HitProperties *hitPropsPtr = reinterpret_cast<const HitProperties *>(
2619                 resultU32Ptr + 2 /* preamble ints */ + nHit * 2 /* ints per HitProperties item */);
2620 
2621             customInstanceIndexToRayIndexVecMap[hitPropsPtr->customInstanceHit].push_back(hitPropsPtr->rayIndex);
2622             rayIndexToCustomInstanceIndexVecMap[hitPropsPtr->rayIndex].push_back(hitPropsPtr->customInstanceHit);
2623         }
2624 
2625         if (static_cast<uint32_t>(customInstanceIndexToRayIndexVecMap.size()) != nHitsReported)
2626         {
2627             /* Invalid number of unique custom instance indices reported. */
2628             result = false;
2629 
2630             goto end;
2631         }
2632 
2633         if (static_cast<uint32_t>(rayIndexToCustomInstanceIndexVecMap.size()) != nHitsReported)
2634         {
2635             /* Invalid ray indices reported by ahit invocations */
2636             result = false;
2637 
2638             goto end;
2639         }
2640 
2641         for (const auto &currentItem : customInstanceIndexToRayIndexVecMap)
2642         {
2643             if (currentItem.second.size() != 1)
2644             {
2645                 /* More than one ray associated with the same custom instance index */
2646                 result = false;
2647 
2648                 goto end;
2649             }
2650 
2651             if (currentItem.second.at(0) > 255)
2652             {
2653                 /* Invalid ray index associated with the instance index */
2654                 result = false;
2655 
2656                 goto end;
2657             }
2658 
2659             if (std::find(m_instanceCustomIndexVec.begin(), m_instanceCustomIndexVec.end(), currentItem.first) ==
2660                 m_instanceCustomIndexVec.end())
2661             {
2662                 /* Invalid custom instance index reported for the ray */
2663                 result = false;
2664 
2665                 goto end;
2666             }
2667         }
2668 
2669     end:
2670         return result;
2671     }
2672 
2673 private:
2674     const AccelerationStructureLayout m_asLayout;
2675     const GeometryType m_geometryType;
2676     const uint32_t m_nMaxHitsToRegister;
2677     const uint32_t m_nRaysPerInvocation;
2678     const bool m_useExtraCullMaskBits;
2679 
2680     mutable std::vector<uint32_t> m_instanceCustomIndexVec;
2681     mutable uint32_t m_lastCustomInstanceIndexUsed;
2682     mutable uint32_t m_nCullMasksUsed;
2683 
2684     std::unique_ptr<GridASProvider> m_asProviderPtr;
2685     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
2686 };
2687 
2688 class MAXRayHitAttributeSizeTest : public TestBase
2689 {
2690 public:
MAXRayHitAttributeSizeTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)2691     MAXRayHitAttributeSizeTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
2692         : m_asStructureLayout(asStructureLayout)
2693         , m_geometryType(geometryType)
2694         , m_gridSizeXYZ(tcu::UVec3(512, 1, 1))
2695         , m_nRayAttributeU32s(0)
2696     {
2697     }
2698 
~MAXRayHitAttributeSizeTest()2699     ~MAXRayHitAttributeSizeTest()
2700     {
2701         /* Stub */
2702     }
2703 
getDispatchSize() const2704     tcu::UVec3 getDispatchSize() const final
2705     {
2706         DE_ASSERT(m_gridSizeXYZ[0] != 0);
2707         DE_ASSERT(m_gridSizeXYZ[1] != 0);
2708         DE_ASSERT(m_gridSizeXYZ[2] != 0);
2709 
2710         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
2711     }
2712 
getResultBufferSize() const2713     uint32_t getResultBufferSize() const final
2714     {
2715         DE_ASSERT(m_gridSizeXYZ[0] != 0);
2716         DE_ASSERT(m_gridSizeXYZ[1] != 0);
2717         DE_ASSERT(m_gridSizeXYZ[2] != 0);
2718 
2719         return static_cast<uint32_t>(
2720             (3 /* nAHits, nCHits, nMisses */ + m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] *
2721                                                    m_nRayAttributeU32s * 2 /* stages where result data is stored */) *
2722             sizeof(uint32_t));
2723     }
2724 
getSpecializationInfoPtr(const VkShaderStageFlagBits & shaderStage)2725     VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits &shaderStage) final
2726     {
2727         VkSpecializationInfo *resultPtr = nullptr;
2728 
2729         if (shaderStage == VK_SHADER_STAGE_INTERSECTION_BIT_KHR || shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR ||
2730             shaderStage == VK_SHADER_STAGE_ANY_HIT_BIT_KHR)
2731         {
2732             resultPtr = &m_specializationInfo;
2733         }
2734 
2735         return resultPtr;
2736     }
2737 
getTLASPtrVecToBind() const2738     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
2739     {
2740         DE_ASSERT(m_tlPtr != nullptr);
2741 
2742         return {m_tlPtr.get()};
2743     }
2744 
resetTLAS()2745     void resetTLAS() final
2746     {
2747         m_tlPtr.reset();
2748     }
2749 
init(vkt::Context &,RayTracingProperties * rtPropertiesPtr)2750     bool init(vkt::Context & /* context    */, RayTracingProperties *rtPropertiesPtr) final
2751     {
2752         const auto maxRayHitAttributeSize = rtPropertiesPtr->getMaxRayHitAttributeSize();
2753 
2754         // TODO: If U8s are supported, we could cover the remaining space with these..
2755         m_nRayAttributeU32s = maxRayHitAttributeSize / static_cast<uint32_t>(sizeof(uint32_t));
2756         DE_ASSERT(m_nRayAttributeU32s != 0);
2757 
2758         m_specializationInfoMapEntry.constantID = 1;
2759         m_specializationInfoMapEntry.offset     = 0;
2760         m_specializationInfoMapEntry.size       = sizeof(uint32_t);
2761 
2762         m_specializationInfo.dataSize      = sizeof(uint32_t);
2763         m_specializationInfo.mapEntryCount = 1;
2764         m_specializationInfo.pData         = reinterpret_cast<const void *>(&m_nRayAttributeU32s);
2765         m_specializationInfo.pMapEntries   = &m_specializationInfoMapEntry;
2766 
2767         return true;
2768     }
2769 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)2770     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
2771                 VkCommandBuffer commandBuffer) final
2772     {
2773         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
2774                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
2775                                                                          m_gridSizeXYZ,
2776                                                                          tcu::Vec3(6, 0, 0), /* gridInterCellDeltaXYZ */
2777                                                                          m_geometryType));
2778 
2779         m_tlPtr =
2780             asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer, 0, /* bottomLevelGeometryFlags */
2781                                       nullptr,                                        /* optASPropertyProviderPtr */
2782                                       nullptr);                                       /* optASFeedbackPtr         */
2783     }
2784 
initPrograms(SourceCollections & programCollection) const2785     void initPrograms(SourceCollections &programCollection) const final
2786     {
2787         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
2788                                                   0u,    /* flags        */
2789                                                   true); /* allowSpirv14 */
2790 
2791         const char *constantDefinitions = "layout(constant_id = 1) const uint N_UINTS_IN_HIT_ATTRIBUTE = 1;\n";
2792 
2793         const char *hitAttributeDefinition = "\n"
2794                                              "hitAttributeEXT block\n"
2795                                              "{\n"
2796                                              "    uint values[N_UINTS_IN_HIT_ATTRIBUTE];\n"
2797                                              "};\n"
2798                                              "\n";
2799 
2800         const char *resultBufferDefinition = "layout(set      = 0, binding = 0, std430) buffer result\n"
2801                                              "{\n"
2802                                              "    uint nAHitsRegistered;\n"
2803                                              "    uint nCHitsRegistered;\n"
2804                                              "    uint nMissesRegistered;\n"
2805                                              "    uint retrievedValues[N_UINTS_IN_HIT_ATTRIBUTE];\n"
2806                                              "};\n";
2807 
2808         {
2809             std::stringstream css;
2810 
2811             css << "#version 460 core\n"
2812                    "\n"
2813                    "#extension GL_EXT_ray_tracing : require\n"
2814                    "\n" +
2815                        de::toString(constantDefinitions) + de::toString(hitAttributeDefinition) +
2816                        "\n"
2817                        "layout(location = 0) rayPayloadInEXT uint unusedPayload;\n" +
2818                        de::toString(resultBufferDefinition) +
2819                        "\n"
2820                        "void main()\n"
2821                        "{\n"
2822                        "    atomicAdd(nAHitsRegistered, 1);\n"
2823                        "\n"
2824                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2825                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2826                        "\n"
2827                        "    for (uint nUint = 0; nUint < N_UINTS_IN_HIT_ATTRIBUTE; ++nUint)\n"
2828                        "    {\n"
2829                        "        retrievedValues[(2 * nInvocation + 1) * N_UINTS_IN_HIT_ATTRIBUTE + nUint] = "
2830                        "values[nUint];\n"
2831                        "    }\n"
2832                        "}\n";
2833 
2834             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
2835         }
2836 
2837         {
2838             std::stringstream css;
2839 
2840             css << "#version 460 core\n"
2841                    "\n"
2842                    "#extension GL_EXT_ray_tracing : require\n"
2843                    "\n" +
2844                        de::toString(constantDefinitions) + de::toString(hitAttributeDefinition) +
2845                        de::toString(resultBufferDefinition) +
2846                        "\n"
2847                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
2848                        "\n"
2849                        "void main()\n"
2850                        "{\n"
2851                        "    atomicAdd(nCHitsRegistered, 1);\n"
2852                        "\n"
2853                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2854                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2855                        "\n"
2856                        "    for (uint nUint = 0; nUint < N_UINTS_IN_HIT_ATTRIBUTE; ++nUint)\n"
2857                        "    {\n"
2858                        "        retrievedValues[(2 * nInvocation + 0) * N_UINTS_IN_HIT_ATTRIBUTE + nUint] = "
2859                        "values[nUint];\n"
2860                        "    }\n"
2861                        "}\n";
2862 
2863             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
2864         }
2865 
2866         {
2867             std::stringstream css;
2868 
2869             css << "#version 460 core\n"
2870                    "\n"
2871                    "#extension GL_EXT_ray_tracing : require\n"
2872                    "\n" +
2873                        de::toString(constantDefinitions) + de::toString(hitAttributeDefinition) +
2874                        de::toString(resultBufferDefinition) +
2875                        "\n"
2876                        "void main()\n"
2877                        "{\n"
2878                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2879                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2880                        "\n"
2881                        "    for (uint nUint = 0; nUint < N_UINTS_IN_HIT_ATTRIBUTE; ++nUint)\n"
2882                        "    {\n"
2883                        "        values[nUint] = 1 + nInvocation + nUint;\n"
2884                        "    }\n"
2885                        "\n"
2886                        "    reportIntersectionEXT(0.95f, 0);\n"
2887                        "}\n";
2888 
2889             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
2890         }
2891 
2892         {
2893             std::stringstream css;
2894 
2895             css << "#version 460 core\n"
2896                    "\n"
2897                    "#extension GL_EXT_ray_tracing : require\n"
2898                    "\n" +
2899                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
2900                        "\n"
2901                        "void main()\n"
2902                        "{\n"
2903                        "    atomicAdd(nMissesRegistered, 1);\n"
2904                        "}\n";
2905 
2906             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
2907         }
2908 
2909         {
2910             std::stringstream css;
2911 
2912             css << "#version 460 core\n"
2913                    "\n"
2914                    "#extension GL_EXT_ray_tracing : require\n"
2915                    "\n"
2916                    "layout(location = 0)              rayPayloadEXT uint               unusedPayload;\n"
2917                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
2918                    "\n"
2919                    "void main()\n"
2920                    "{\n"
2921                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2922                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2923                    "    uint  rayFlags     = 0;\n"
2924                    "    float tmin         = 0.001;\n"
2925                    "    float tmax         = 9.0;\n"
2926                    "\n"
2927                    "    uint  cullMask     = 0xFF;\n"
2928                    "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
2929                    "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
2930                    "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
2931                    "    vec3  origin       = target - vec3(0, 2, 0);\n"
2932                    "    vec3  direct       = normalize(target - origin);\n"
2933                    "\n"
2934                    "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, "
2935                    "0);\n"
2936                    "}\n";
2937 
2938             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
2939         }
2940     }
2941 
verifyResultBuffer(const void * resultDataPtr) const2942     bool verifyResultBuffer(const void *resultDataPtr) const final
2943     {
2944         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
2945         bool result                  = false;
2946 
2947         const auto nAHitsReported    = *resultU32Ptr;
2948         const auto nCHitsRegistered  = *(resultU32Ptr + 1);
2949         const auto nMissesRegistered = *(resultU32Ptr + 2);
2950 
2951         if (nAHitsReported != m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] / 2)
2952         {
2953             goto end;
2954         }
2955 
2956         if (nCHitsRegistered != nAHitsReported)
2957         {
2958             goto end;
2959         }
2960 
2961         if (nMissesRegistered != nAHitsReported)
2962         {
2963             goto end;
2964         }
2965 
2966         for (uint32_t nHit = 0; nHit < nAHitsReported; ++nHit)
2967         {
2968             const uint32_t *ahitValues = resultU32Ptr + 3 /* preamble ints */ + (2 * nHit + 0) * m_nRayAttributeU32s;
2969             const uint32_t *chitValues = resultU32Ptr + 3 /* preamble ints */ + (2 * nHit + 1) * m_nRayAttributeU32s;
2970             const bool missExpected    = (nHit % 2) != 0;
2971 
2972             for (uint32_t nValue = 0; nValue < m_nRayAttributeU32s; ++nValue)
2973             {
2974                 if (!missExpected)
2975                 {
2976                     if (ahitValues[nValue] != 1 + nHit + nValue)
2977                     {
2978                         goto end;
2979                     }
2980 
2981                     if (chitValues[nValue] != 1 + nHit + nValue)
2982                     {
2983                         goto end;
2984                     }
2985                 }
2986                 else
2987                 {
2988                     if (ahitValues[nValue] != 0)
2989                     {
2990                         goto end;
2991                     }
2992 
2993                     if (chitValues[nValue] != 0)
2994                     {
2995                         goto end;
2996                     }
2997                 }
2998             }
2999         }
3000 
3001         result = true;
3002     end:
3003         return result;
3004     }
3005 
3006 private:
3007     const AccelerationStructureLayout m_asStructureLayout;
3008     const GeometryType m_geometryType;
3009 
3010     const tcu::UVec3 m_gridSizeXYZ;
3011     uint32_t m_nRayAttributeU32s;
3012     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
3013 
3014     VkSpecializationInfo m_specializationInfo;
3015     VkSpecializationMapEntry m_specializationInfoMapEntry;
3016 };
3017 
3018 class MAXRTInvocationsSupportedTest : public TestBase, public ASPropertyProvider, public IGridASFeedback
3019 {
3020 public:
MAXRTInvocationsSupportedTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)3021     MAXRTInvocationsSupportedTest(const GeometryType &geometryType,
3022                                   const AccelerationStructureLayout &asStructureLayout)
3023         : m_asStructureLayout(asStructureLayout)
3024         , m_geometryType(geometryType)
3025         , m_lastCustomInstanceIndexUsed(0)
3026         , m_nMaxCells(8 * 8 * 8)
3027     {
3028     }
3029 
~MAXRTInvocationsSupportedTest()3030     ~MAXRTInvocationsSupportedTest()
3031     {
3032         /* Stub */
3033     }
3034 
getCHitShaderCollectionShaderNames() const3035     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
3036     {
3037         return {};
3038     }
3039 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const3040     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
3041     {
3042         DE_UNREF(nBL);
3043         DE_UNREF(nInstance);
3044 
3045         return ++m_lastCustomInstanceIndexUsed;
3046     }
3047 
getDispatchSize() const3048     tcu::UVec3 getDispatchSize() const final
3049     {
3050         DE_ASSERT(m_gridSizeXYZ[0] != 0);
3051         DE_ASSERT(m_gridSizeXYZ[1] != 0);
3052         DE_ASSERT(m_gridSizeXYZ[2] != 0);
3053 
3054         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
3055     }
3056 
getResultBufferSize() const3057     uint32_t getResultBufferSize() const final
3058     {
3059         DE_ASSERT(m_gridSizeXYZ[0] != 0);
3060         DE_ASSERT(m_gridSizeXYZ[1] != 0);
3061         DE_ASSERT(m_gridSizeXYZ[2] != 0);
3062 
3063         return static_cast<uint32_t>((2 /* nHits, nMisses */ + m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] *
3064                                                                    1 /* hit instance custom index */) *
3065                                      sizeof(uint32_t));
3066     }
3067 
getTLASPtrVecToBind() const3068     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
3069     {
3070         DE_ASSERT(m_tlPtr != nullptr);
3071 
3072         return {m_tlPtr.get()};
3073     }
3074 
init(vkt::Context & context,RayTracingProperties * rtPropertiesPtr)3075     bool init(vkt::Context &context, RayTracingProperties *rtPropertiesPtr) final
3076     {
3077         m_context = &context;
3078         /* NOTE: In order to avoid running into a situation where the test attempts to create a buffer of size larger than permitted by Vulkan,
3079          *       we limit the maximum number of testable invocations to 2^29 on 64bit CTS build and driver or to 2^27 on 32bit */
3080         const auto maxComputeWorkGroupCount        = context.getDeviceProperties().limits.maxComputeWorkGroupCount;
3081         const auto maxComputeWorkGroupSize         = context.getDeviceProperties().limits.maxComputeWorkGroupSize;
3082         const uint64_t maxGlobalRTWorkGroupSize[3] = {
3083             static_cast<uint64_t>(maxComputeWorkGroupCount[0]) * static_cast<uint64_t>(maxComputeWorkGroupSize[0]),
3084             static_cast<uint64_t>(maxComputeWorkGroupCount[1]) * static_cast<uint64_t>(maxComputeWorkGroupSize[1]),
3085             static_cast<uint64_t>(maxComputeWorkGroupCount[2]) * static_cast<uint64_t>(maxComputeWorkGroupSize[2])};
3086         const auto maxRayDispatchInvocationCount =
3087             de::min(static_cast<uint64_t>(rtPropertiesPtr->getMaxRayDispatchInvocationCount()),
3088 #if (DE_PTR_SIZE == 4)
3089                     static_cast<uint64_t>(1ULL << 27));
3090 #else
3091                     static_cast<uint64_t>(1ULL << 29));
3092 #endif
3093 
3094         m_gridSizeXYZ[0] =
3095             de::max(1u, static_cast<uint32_t>((maxRayDispatchInvocationCount) % maxGlobalRTWorkGroupSize[0]));
3096         m_gridSizeXYZ[1] = de::max(1u, static_cast<uint32_t>((maxRayDispatchInvocationCount / m_gridSizeXYZ[0]) %
3097                                                              maxGlobalRTWorkGroupSize[1]));
3098         m_gridSizeXYZ[2] =
3099             de::max(1u, static_cast<uint32_t>((maxRayDispatchInvocationCount / m_gridSizeXYZ[0] / m_gridSizeXYZ[1]) %
3100                                               maxGlobalRTWorkGroupSize[2]));
3101 
3102         /* TODO: The simple formulas above may need to be improved to handle your implementation correctly */
3103         DE_ASSERT(m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] == maxRayDispatchInvocationCount);
3104 
3105         return true;
3106     }
3107 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)3108     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
3109                 VkCommandBuffer commandBuffer) final
3110     {
3111         std::unique_ptr<GridASProvider> asProviderPtr(
3112             new GridASProvider(tcu::Vec3(0, 0, 0),    /* gridStartXYZ          */
3113                                tcu::Vec3(1, 1, 1),    /* gridCellSizeXYZ       */
3114                                tcu::UVec3(512, 1, 1), /* gridSizeXYZ           */
3115                                tcu::Vec3(3, 0, 0),    /* gridInterCellDeltaXYZ */
3116                                m_geometryType));
3117 
3118         m_tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
3119                                             VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
3120                                             this,  /* optASPropertyProviderPtr */
3121                                             this); /* optASFeedbackPtr            */
3122     }
3123 
initPrograms(SourceCollections & programCollection) const3124     void initPrograms(SourceCollections &programCollection) const final
3125     {
3126         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
3127                                                   0u,    /* flags        */
3128                                                   true); /* allowSpirv14 */
3129 
3130         const char *hitPropsDefinition = "struct HitProps\n"
3131                                          "{\n"
3132                                          "    uint instanceCustomIndex;\n"
3133                                          "};\n";
3134 
3135         {
3136             std::stringstream css;
3137 
3138             css << "#version 460 core\n"
3139                    "\n"
3140                    "#extension GL_EXT_ray_tracing : require\n"
3141                    "\n"
3142                    "hitAttributeEXT vec3 unusedAttribute;\n"
3143                    "\n" +
3144                        de::toString(hitPropsDefinition) +
3145                        "\n"
3146                        "layout(location = 0) rayPayloadInEXT      uint   unusedPayload;\n"
3147                        "layout(set      = 0, binding = 0, std430) buffer result\n"
3148                        "{\n"
3149                        "    uint     nHitsRegistered;\n"
3150                        "    uint     nMissesRegistered;\n"
3151                        "    HitProps hits[];\n"
3152                        "};\n"
3153                        "\n"
3154                        "void main()\n"
3155                        "{\n"
3156                        "    atomicAdd(nHitsRegistered, 1);\n"
3157                        "\n"
3158                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3159                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3160                        "\n"
3161                        "    hits[nInvocation].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
3162                        "}\n";
3163 
3164             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
3165         }
3166 
3167         {
3168             std::stringstream css;
3169 
3170             css << "#version 460 core\n"
3171                    "\n"
3172                    "#extension GL_EXT_ray_tracing : require\n"
3173                    "\n"
3174                    "hitAttributeEXT vec3 hitAttribute;\n"
3175                    "\n"
3176                    "void main()\n"
3177                    "{\n"
3178                    "    reportIntersectionEXT(0.95f, 0);\n"
3179                    "}\n";
3180 
3181             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
3182         }
3183 
3184         {
3185             std::stringstream css;
3186 
3187             css << "#version 460 core\n"
3188                    "\n"
3189                    "#extension GL_EXT_ray_tracing : require\n"
3190                    "\n" +
3191                        de::toString(hitPropsDefinition) +
3192                        "\n"
3193                        "layout(set = 0, binding = 0, std430) buffer result\n"
3194                        "{\n"
3195                        "    uint     nHitsRegistered;\n"
3196                        "    uint     nMissesRegistered;\n"
3197                        "    HitProps hits[];\n"
3198                        "};\n"
3199                        "\n"
3200                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
3201                        "\n"
3202                        "void main()\n"
3203                        "{\n"
3204                        "    atomicAdd(nMissesRegistered, 1);\n"
3205                        "}\n";
3206 
3207             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
3208         }
3209 
3210         {
3211             std::stringstream css;
3212 
3213             css << "#version 460 core\n"
3214                    "\n"
3215                    "#extension GL_EXT_ray_tracing : require\n"
3216                    "\n"
3217                    "layout(location = 0)              rayPayloadEXT uint               unusedPayload;\n"
3218                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
3219                    "\n"
3220                    "void main()\n"
3221                    "{\n"
3222                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3223                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3224                    "    uint  rayFlags     = 0;\n"
3225                    "    float tmin         = 0.001;\n"
3226                    "    float tmax         = 2.1;\n"
3227                    "\n"
3228                    "    uint  cullMask     = 0xFF;\n"
3229                    "    vec3  cellStartXYZ = vec3( (nInvocation % " +
3230                        de::toString(m_nMaxCells) +
3231                        ") * 3, 0.0, 0.0);\n"
3232                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
3233                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
3234                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
3235                        "    vec3  direct       = normalize(target - origin);\n"
3236                        "\n"
3237                        "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, "
3238                        "tmax, 0);\n"
3239                        "}\n";
3240 
3241             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
3242         }
3243     }
3244 
resetTLAS()3245     void resetTLAS() final
3246     {
3247         m_tlPtr.reset();
3248     }
3249 
verifyResultBuffer(const void * resultDataPtr) const3250     bool verifyResultBuffer(const void *resultDataPtr) const final
3251     {
3252         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
3253         bool result                  = false;
3254 
3255         typedef struct
3256         {
3257             uint32_t instanceCustomIndex;
3258         } HitProperties;
3259 
3260         const auto nHitsReported   = *resultU32Ptr;
3261         const auto nMissesReported = *(resultU32Ptr + 1);
3262 
3263         if (nHitsReported != m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2])
3264         {
3265             goto end;
3266         }
3267 
3268         if (nMissesReported != 0)
3269         {
3270             goto end;
3271         }
3272 
3273         for (uint32_t nRay = 0; nRay < nHitsReported; ++nRay)
3274         {
3275             // Touch watch dog every 100000 loops to avoid timeout issue.
3276             if (nRay > 0 && (nRay % 100000 == 0))
3277                 m_context->getTestContext().touchWatchdog();
3278             const HitProperties *hitPropsPtr =
3279                 reinterpret_cast<const HitProperties *>(resultU32Ptr + 2 /* preamble ints */) + nRay;
3280 
3281             if (m_nRayToInstanceIndexExpected.at(nRay % m_nMaxCells) != hitPropsPtr->instanceCustomIndex)
3282             {
3283                 goto end;
3284             }
3285         }
3286 
3287         result = true;
3288     end:
3289         return result;
3290     }
3291 
3292 private:
onCullMaskAssignedToCell(const tcu::UVec3 & cellLocation,const uint8_t & cullMaskAssigned)3293     void onCullMaskAssignedToCell(const tcu::UVec3 &cellLocation, const uint8_t &cullMaskAssigned)
3294     {
3295         /* Dont'care */
3296         DE_UNREF(cellLocation);
3297         DE_UNREF(cullMaskAssigned);
3298     }
3299 
onInstanceCustomIndexAssignedToCell(const tcu::UVec3 & cellLocation,const uint32_t & customIndexAssigned)3300     void onInstanceCustomIndexAssignedToCell(const tcu::UVec3 &cellLocation, const uint32_t &customIndexAssigned)
3301     {
3302         DE_ASSERT(cellLocation[1] == 0);
3303         DE_ASSERT(cellLocation[2] == 0);
3304 
3305         m_nRayToInstanceIndexExpected[cellLocation[0]] = customIndexAssigned;
3306     }
3307 
3308     vkt::Context *m_context;
3309     const AccelerationStructureLayout m_asStructureLayout;
3310     const GeometryType m_geometryType;
3311 
3312     tcu::UVec3 m_gridSizeXYZ;
3313     mutable uint32_t m_lastCustomInstanceIndexUsed;
3314     const uint32_t m_nMaxCells;
3315     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
3316 
3317     std::map<uint32_t, uint32_t> m_nRayToInstanceIndexExpected;
3318 };
3319 
3320 class NoDuplicateAnyHitTest : public TestBase
3321 {
3322 public:
NoDuplicateAnyHitTest(const AccelerationStructureLayout & asLayout,const GeometryType & geometryType)3323     NoDuplicateAnyHitTest(const AccelerationStructureLayout &asLayout, const GeometryType &geometryType)
3324         : m_asLayout(asLayout)
3325         , m_geometryType(geometryType)
3326         , m_gridSizeXYZ(tcu::UVec3(4, 4, 4))
3327         , m_nRaysToTrace(32)
3328     {
3329         /* Stub */
3330     }
3331 
~NoDuplicateAnyHitTest()3332     ~NoDuplicateAnyHitTest()
3333     {
3334         /* Stub */
3335     }
3336 
getCHitShaderCollectionShaderNames() const3337     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
3338     {
3339         return {};
3340     }
3341 
getDispatchSize() const3342     tcu::UVec3 getDispatchSize() const final
3343     {
3344         return tcu::UVec3(4, 4, m_nRaysToTrace / (4 * 4) + 1);
3345     }
3346 
getResultBufferSize() const3347     uint32_t getResultBufferSize() const final
3348     {
3349         const auto nPrimitives = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
3350 
3351         return static_cast<uint32_t>((2 /* nHits, nMisses */ + 3 * nPrimitives /* instancePrimitiveIDPairsUsed */) *
3352                                      sizeof(uint32_t) * m_nRaysToTrace);
3353     }
3354 
getTLASPtrVecToBind() const3355     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
3356     {
3357         return {m_tlPtr.get()};
3358     }
3359 
resetTLAS()3360     void resetTLAS() final
3361     {
3362         m_tlPtr.reset();
3363     }
3364 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)3365     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
3366                 VkCommandBuffer commandBuffer) final
3367     {
3368         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),                         /* gridStartXYZ          */
3369                                                  tcu::Vec3(1, 1, 1),                         /* gridCellSizeXYZ       */
3370                                                  m_gridSizeXYZ, tcu::Vec3(2.0f, 2.0f, 2.0f), /* gridInterCellDeltaXYZ */
3371                                                  m_geometryType));
3372 
3373         m_tlPtr = m_asProviderPtr->createTLAS(context, m_asLayout, commandBuffer,
3374                                               VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
3375                                               nullptr,  /* optASPropertyProviderPtr */
3376                                               nullptr); /* optASFedbackPtr          */
3377     }
3378 
initPrograms(SourceCollections & programCollection) const3379     void initPrograms(SourceCollections &programCollection) const final
3380     {
3381         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
3382                                                   0u,    /* flags        */
3383                                                   true); /* allowSpirv14 */
3384 
3385         const auto nTotalPrimitives        = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
3386         const auto hitPropertiesDefinition = "struct HitProperties\n"
3387                                              "{\n"
3388                                              "    uint nHitsRegistered;\n"
3389                                              "     uint nMissRegistered;\n"
3390                                              "    uint instancePrimitiveIDPairsUsed[3 * " +
3391                                              de::toString(nTotalPrimitives) +
3392                                              "];\n"
3393                                              "};\n";
3394 
3395         {
3396             std::stringstream css;
3397 
3398             css << "#version 460 core\n"
3399                    "\n"
3400                    "#extension GL_EXT_ray_tracing : require\n"
3401                    "\n"
3402                    "hitAttributeEXT vec3 unusedAttribute;\n"
3403                    "\n" +
3404                        hitPropertiesDefinition +
3405                        "\n"
3406                        "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
3407                        "layout(set      = 0, binding = 0, std430) buffer result\n"
3408                        "{\n"
3409                        "    HitProperties rayToHitProps["
3410                 << de::toString(m_nRaysToTrace)
3411                 << "];\n"
3412                    "};\n"
3413                    "\n"
3414                    "void main()\n"
3415                    "{\n"
3416                    "    uint nRay            = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3417                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3418                    "    uint nHitsRegistered = atomicAdd(rayToHitProps[nRay].nHitsRegistered, 1);\n"
3419                    "\n"
3420                    "    rayToHitProps[nRay].instancePrimitiveIDPairsUsed[3 * nHitsRegistered + 0] = 1 + "
3421                    "gl_InstanceID;\n"
3422                    "    rayToHitProps[nRay].instancePrimitiveIDPairsUsed[3 * nHitsRegistered + 1] = 1 + "
3423                    "gl_PrimitiveID;\n"
3424                    "    rayToHitProps[nRay].instancePrimitiveIDPairsUsed[3 * nHitsRegistered + 2] = 1 + "
3425                    "gl_GeometryIndexEXT;\n"
3426                    "}\n";
3427 
3428             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
3429         }
3430 
3431         {
3432             std::stringstream css;
3433 
3434             css << "#version 460 core\n"
3435                    "\n"
3436                    "#extension GL_EXT_ray_tracing : require\n"
3437                    "\n"
3438                    "hitAttributeEXT vec3 hitAttribute;\n"
3439                    "\n"
3440                    "void main()\n"
3441                    "{\n"
3442                    "    reportIntersectionEXT(0.95f, 0);\n"
3443                    "}\n";
3444 
3445             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
3446         }
3447 
3448         {
3449             std::stringstream css;
3450 
3451             css << "#version 460 core\n"
3452                    "\n"
3453                    "#extension GL_EXT_ray_tracing : require\n"
3454                    "\n" +
3455                        hitPropertiesDefinition +
3456                        "layout(location = 0) rayPayloadInEXT      vec3   unusedPayload;\n"
3457                        "layout(set      = 0, binding = 0, std430) buffer result\n"
3458                        "{\n"
3459                        "    HitProperties rayToHitProps["
3460                 << de::toString(m_nRaysToTrace)
3461                 << "];\n"
3462                    "};\n"
3463                    "\n"
3464                    "void main()\n"
3465                    "{\n"
3466                    "    uint nRay = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + gl_LaunchIDEXT.y * "
3467                    "gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3468                    "\n"
3469                    "    atomicAdd(rayToHitProps[nRay].nMissRegistered, 1);\n"
3470                    "}\n";
3471 
3472             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
3473         }
3474 
3475         {
3476             std::stringstream css;
3477 
3478             css << "#version 460 core\n"
3479                    "\n"
3480                    "#extension GL_EXT_ray_tracing : require\n"
3481                    "\n" +
3482                        hitPropertiesDefinition +
3483                        "layout(location = 0)              rayPayloadEXT vec3                     unusedPayload;\n"
3484                        "layout(set      = 0, binding = 1) uniform       accelerationStructureEXT topLevelAS;\n"
3485                        "\n"
3486                        "void main()\n"
3487                        "{\n"
3488                        "    uint  nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3489                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3490                        "    uint  rayFlags    = 0;\n"
3491                        "    uint  cullMask    = 0xFF;\n"
3492                        "    float tmin        = 0.001;\n"
3493                        "    float tmax        = 9.0;\n"
3494                        "    vec3  origin      = vec3(4,                                  4,                            "
3495                        "      4);\n"
3496                        "    vec3  target      = vec3(float(gl_LaunchIDEXT.x * 2) + 0.5f, float(gl_LaunchIDEXT.y * 2) + "
3497                        "0.5f, float(gl_LaunchIDEXT.z * 2) + 0.5f);\n"
3498                        "    vec3  direct      = normalize(target - origin);\n"
3499                        "\n"
3500                        "    if (nInvocation >= "
3501                 << m_nRaysToTrace
3502                 << ")\n"
3503                    "    {\n"
3504                    "        return;\n"
3505                    "    }\n"
3506                    "\n"
3507                    "    traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
3508                    "}\n";
3509 
3510             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
3511         }
3512     }
3513 
verifyResultBuffer(const void * resultDataPtr) const3514     bool verifyResultBuffer(const void *resultDataPtr) const final
3515     {
3516         const auto nTotalPrimitives = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
3517         bool result                 = true;
3518 
3519         for (uint32_t nRay = 0; nRay < m_nRaysToTrace; ++nRay)
3520         {
3521             std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> tupleVec;
3522             const auto rayProps = reinterpret_cast<const uint32_t *>(resultDataPtr) + (2 + 3 * nTotalPrimitives) * nRay;
3523 
3524             // 1. At least one ahit invocation must have been made.
3525             if (rayProps[0] == 0)
3526             {
3527                 result = false;
3528 
3529                 goto end;
3530             }
3531 
3532             // 2. It's OK for each ray to intersect many AABBs, but no AABB should have had >1 ahit invocation fired.
3533             for (uint32_t nPrimitive = 0; nPrimitive < nTotalPrimitives; nPrimitive++)
3534             {
3535                 const auto instanceID    = rayProps[2 /* nHits, nMissesRegistered */ + 3 * nPrimitive + 0];
3536                 const auto primitiveID   = rayProps[2 /* nHits, nMissesRegistered */ + 3 * nPrimitive + 1];
3537                 const auto geometryIndex = rayProps[2 /* nHits, nMissesRegistered */ + 3 * nPrimitive + 2];
3538 
3539                 const auto currentTuple =
3540                     std::tuple<uint32_t, uint32_t, uint32_t>(instanceID, primitiveID, geometryIndex);
3541 
3542                 if (instanceID != 0 || primitiveID != 0 || geometryIndex != 0)
3543                 {
3544                     if (std::find(tupleVec.begin(), tupleVec.end(), currentTuple) != tupleVec.end())
3545                     {
3546                         result = false;
3547 
3548                         goto end;
3549                     }
3550 
3551                     tupleVec.push_back(currentTuple);
3552                 }
3553             }
3554 
3555             // 3. None of the traced rays should have triggered the miss shader invocation.
3556             if (rayProps[1] != 0)
3557             {
3558                 result = false;
3559 
3560                 goto end;
3561             }
3562         }
3563 
3564     end:
3565         return result;
3566     }
3567 
3568 private:
3569     const AccelerationStructureLayout m_asLayout;
3570     const GeometryType m_geometryType;
3571     const tcu::UVec3 m_gridSizeXYZ;
3572     const uint32_t m_nRaysToTrace;
3573 
3574     std::unique_ptr<GridASProvider> m_asProviderPtr;
3575     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
3576 };
3577 
3578 const std::vector<VariableType> g_ShaderRecordBlockTestVars1 = {
3579     VariableType::FLOAT, VariableType::VEC2,   VariableType::VEC3,   VariableType::VEC4,
3580 
3581     VariableType::MAT2,  VariableType::MAT2X2, VariableType::MAT2X3, VariableType::MAT2X4,
3582     VariableType::MAT3,  VariableType::MAT3X2, VariableType::MAT3X3, VariableType::MAT3X4,
3583     VariableType::MAT4,  VariableType::MAT4X2, VariableType::MAT4X3, VariableType::MAT4X4,
3584 
3585     VariableType::INT,   VariableType::IVEC2,  VariableType::IVEC3,  VariableType::IVEC4,
3586 
3587     VariableType::UINT,  VariableType::UVEC2,  VariableType::UVEC3,  VariableType::UVEC4,
3588 };
3589 
3590 const std::vector<VariableType> g_ShaderRecordBlockTestVars2 = {
3591     VariableType::DOUBLE, VariableType::DVEC2,   VariableType::DVEC3,   VariableType::DVEC4,
3592 
3593     VariableType::DMAT2,  VariableType::DMAT2X2, VariableType::DMAT2X3, VariableType::DMAT2X4, VariableType::DMAT3,
3594 };
3595 
3596 const std::vector<VariableType> g_ShaderRecordBlockTestVars3 = {
3597     VariableType::DMAT3X2, VariableType::DMAT3X3, VariableType::DMAT3X4, VariableType::DMAT4,
3598     VariableType::DMAT4X2, VariableType::DMAT4X3, VariableType::DMAT4X4,
3599 };
3600 
3601 const std::vector<VariableType> g_ShaderRecordBlockTestVars4 = {
3602     VariableType::VEC3,   VariableType::VEC4,
3603 
3604     VariableType::INT16,  VariableType::I16VEC2, VariableType::I16VEC3, VariableType::I16VEC4,
3605 
3606     VariableType::MAT3X3, VariableType::MAT3X4,  VariableType::MAT4X3,
3607 
3608     VariableType::UINT16, VariableType::U16VEC2, VariableType::U16VEC3, VariableType::U16VEC4,
3609 };
3610 
3611 const std::vector<VariableType> g_ShaderRecordBlockTestVars5 = {
3612     VariableType::VEC3,   VariableType::VEC4,
3613 
3614     VariableType::INT64,  VariableType::I64VEC2, VariableType::I64VEC3, VariableType::I64VEC4,
3615 
3616     VariableType::MAT3X3, VariableType::MAT3X4,  VariableType::MAT4X3,
3617 
3618     VariableType::UINT64, VariableType::U64VEC2, VariableType::U64VEC3, VariableType::U64VEC4,
3619 };
3620 
3621 const std::vector<VariableType> g_ShaderRecordBlockTestVars6 = {
3622     VariableType::VEC3,   VariableType::VEC4,
3623 
3624     VariableType::INT8,   VariableType::I8VEC2, VariableType::I8VEC3, VariableType::I8VEC4,
3625 
3626     VariableType::MAT3X3, VariableType::MAT3X4, VariableType::MAT4X3,
3627 
3628     VariableType::UINT8,  VariableType::U8VEC2, VariableType::U8VEC3, VariableType::U8VEC4,
3629 };
3630 
3631 class ShaderRecordBlockTest : public TestBase
3632 {
3633 public:
ShaderRecordBlockTest(const TestType & testType,const std::vector<VariableType> & varTypesToTest)3634     ShaderRecordBlockTest(const TestType &testType, const std::vector<VariableType> &varTypesToTest)
3635         : m_gridSizeXYZ(tcu::UVec3(2, 2, 2))
3636         , m_testType(testType)
3637         , m_varTypesToTest(varTypesToTest)
3638         , m_resultBufferSize(0)
3639         , m_shaderRecordSize(0)
3640     {
3641         initTestItems();
3642     }
3643 
~ShaderRecordBlockTest()3644     ~ShaderRecordBlockTest()
3645     {
3646         /* Stub */
3647     }
3648 
getDispatchSize() const3649     tcu::UVec3 getDispatchSize() const final
3650     {
3651         return tcu::UVec3(3, 1, 1);
3652     }
3653 
getResultBufferSize() const3654     uint32_t getResultBufferSize() const final
3655     {
3656         return m_resultBufferSize;
3657     }
3658 
getShaderRecordData(const ShaderGroups & shaderGroup) const3659     const void *getShaderRecordData(const ShaderGroups &shaderGroup) const final
3660     {
3661         return (shaderGroup == ShaderGroups::HIT_GROUP)  ? m_shaderGroupToRecordDataMap.at(shaderGroup).data() :
3662                (shaderGroup == ShaderGroups::MISS_GROUP) ? m_shaderGroupToRecordDataMap.at(shaderGroup).data() :
3663                                                            nullptr;
3664     }
3665 
getShaderRecordSize(const ShaderGroups & shaderGroup) const3666     uint32_t getShaderRecordSize(const ShaderGroups &shaderGroup) const final
3667     {
3668         DE_ASSERT(m_shaderRecordSize != 0);
3669 
3670         return ((shaderGroup == ShaderGroups::HIT_GROUP) || (shaderGroup == ShaderGroups::MISS_GROUP)) ?
3671                    m_shaderRecordSize :
3672                    0;
3673     }
3674 
getTLASPtrVecToBind() const3675     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
3676     {
3677         return {m_tlPtr.get()};
3678     }
3679 
getVarsToTest(const TestType & testType)3680     static std::vector<VariableType> getVarsToTest(const TestType &testType)
3681     {
3682         return ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1) ||
3683                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1) ||
3684                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_1) ||
3685                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_1)) ?
3686                    g_ShaderRecordBlockTestVars1 :
3687                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2) ||
3688                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2) ||
3689                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_2) ||
3690                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_2)) ?
3691                    g_ShaderRecordBlockTestVars2 :
3692                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3) ||
3693                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3) ||
3694                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_3) ||
3695                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_3)) ?
3696                    g_ShaderRecordBlockTestVars3 :
3697                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4) ||
3698                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4) ||
3699                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_4) ||
3700                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_4)) ?
3701                    g_ShaderRecordBlockTestVars4 :
3702                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5) ||
3703                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5) ||
3704                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_5) ||
3705                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_5)) ?
3706                    g_ShaderRecordBlockTestVars5 :
3707                    g_ShaderRecordBlockTestVars6;
3708     }
3709 
resetTLAS()3710     void resetTLAS() final
3711     {
3712         m_tlPtr.reset();
3713     }
3714 
init(vkt::Context &,RayTracingProperties *)3715     bool init(vkt::Context & /* context */, RayTracingProperties * /* rtPropsPtr */) final
3716     {
3717         // Cache required result buffer size.
3718         {
3719             uint32_t largestBaseTypeSizeUsed = 0;
3720             const auto &lastItem             = m_testItems.items.back();
3721             const uint32_t nResultBytesPerShaderStage =
3722                 lastItem.resultBufferProps.bufferOffset + lastItem.arraySize * lastItem.resultBufferProps.arrayStride;
3723             const VkShaderStageFlagBits shaderStages[] = {
3724                 VK_SHADER_STAGE_MISS_BIT_KHR,
3725                 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
3726                 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
3727                 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
3728             };
3729 
3730             m_shaderRecordSize =
3731                 lastItem.inputBufferProps.bufferOffset + lastItem.arraySize * lastItem.inputBufferProps.arrayStride;
3732 
3733             for (const auto &currentTestItem : m_testItems.items)
3734             {
3735                 const auto baseType      = getBaseType(currentTestItem.type);
3736                 const auto componentSize = getComponentSizeBytes(baseType);
3737 
3738                 largestBaseTypeSizeUsed = de::max(componentSize, largestBaseTypeSizeUsed);
3739             }
3740 
3741             for (const auto &currentShaderStage : shaderStages)
3742             {
3743                 m_shaderStageToResultBufferOffset[currentShaderStage] = m_resultBufferSize;
3744 
3745                 m_resultBufferSize =
3746                     de::roundUp(m_resultBufferSize, static_cast<uint32_t>(sizeof(largestBaseTypeSizeUsed)));
3747                 m_resultBufferSize += nResultBytesPerShaderStage;
3748             }
3749         }
3750 
3751         return true;
3752     }
3753 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)3754     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
3755                 VkCommandBuffer commandBuffer) final
3756     {
3757         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),                         /* gridStartXYZ          */
3758                                                  tcu::Vec3(1, 1, 1),                         /* gridCellSizeXYZ       */
3759                                                  m_gridSizeXYZ, tcu::Vec3(2.0f, 2.0f, 2.0f), /* gridInterCellDeltaXYZ */
3760                                                  GeometryType::AABB));
3761 
3762         m_tlPtr = m_asProviderPtr->createTLAS(context, AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES,
3763                                               commandBuffer, VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
3764                                               nullptr,  /* optASPropertyProviderPtr */
3765                                               nullptr); /* optASFedbackPtr          */
3766     }
3767 
initPrograms(SourceCollections & programCollection) const3768     void initPrograms(SourceCollections &programCollection) const final
3769     {
3770         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
3771                                                   0u,    /* flags        */
3772                                                   true); /* allowSpirv14 */
3773 
3774         const bool isSTD430Test         = isExplicitSTD430OffsetTest(m_testType) || isSTD430LayoutTest(m_testType);
3775         const bool requires16BitStorage = usesI16(m_testType) || usesU16(m_testType);
3776         const bool requires8BitStorage  = usesI8(m_testType) || usesU8(m_testType);
3777         const bool requiresInt64        = usesI64(m_testType) || usesU64(m_testType);
3778         const bool usesExplicitOffsets =
3779             isExplicitScalarOffsetTest(m_testType) || isExplicitSTD430OffsetTest(m_testType);
3780         const auto inputBlockVariablesGLSL =
3781             getGLSLForStructItem(m_testItems, usesExplicitOffsets, true /* targetsInputBuffer            */);
3782         const auto outputStructVariablesGLSL =
3783             getGLSLForStructItem(m_testItems, false, /* includeOffsetLayoutQualifier */
3784                                  false /* targetsInputBuffer            */);
3785 
3786         const auto inputBufferGLSL = "layout (" + std::string((!isSTD430Test) ? "scalar, " : "std430, ") +
3787                                      "shaderRecordEXT) buffer ib\n"
3788                                      "{\n" +
3789                                      inputBlockVariablesGLSL + "} inputBuffer;\n";
3790         const auto outputBufferGLSL = "struct OutputData\n"
3791                                       "{\n" +
3792                                       outputStructVariablesGLSL +
3793                                       "};\n"
3794                                       "\n"
3795                                       "layout (std430, set = 0, binding = 0) buffer ob\n"
3796                                       "{\n"
3797                                       "    OutputData results[4];\n"
3798                                       "};\n";
3799 
3800         std::string preamble;
3801 
3802         {
3803             std::stringstream css;
3804 
3805             css << "#version 460 core\n"
3806                    "\n"
3807                    "#extension GL_EXT_ray_tracing : require\n";
3808 
3809             if (!isSTD430Test)
3810             {
3811                 css << "#extension GL_EXT_scalar_block_layout : require\n";
3812             }
3813 
3814             if (requires16BitStorage)
3815             {
3816                 css << "#extension GL_EXT_shader_16bit_storage : require\n";
3817             }
3818 
3819             if (requires8BitStorage)
3820             {
3821                 css << "#extension GL_EXT_shader_8bit_storage : require\n";
3822             }
3823 
3824             if (requiresInt64)
3825             {
3826                 css << "#extension GL_ARB_gpu_shader_int64 : require\n";
3827             }
3828 
3829             preamble = css.str();
3830         }
3831 
3832         {
3833             std::stringstream css;
3834 
3835             css << preamble
3836                 << "\n"
3837                    "                     hitAttributeEXT vec3 unusedAttribute;\n"
3838                    "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n"
3839                    "\n" +
3840                        inputBufferGLSL + outputBufferGLSL +
3841                        "\n"
3842                        "void main()\n"
3843                        "{\n" +
3844                        getGLSLForSetters(m_testItems, 3) + "}\n";
3845 
3846             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
3847         }
3848 
3849         {
3850             std::stringstream css;
3851 
3852             css << preamble
3853                 << "\n"
3854                    "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n" +
3855                        inputBufferGLSL + outputBufferGLSL +
3856                        "\n"
3857                        "void main()\n"
3858                        "{\n" +
3859                        getGLSLForSetters(m_testItems, 1) + "}\n";
3860 
3861             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
3862         }
3863 
3864         {
3865             std::stringstream css;
3866 
3867             css << preamble
3868                 << "\n"
3869                    "hitAttributeEXT vec3 hitAttribute;\n"
3870                    "\n" +
3871                        inputBufferGLSL + outputBufferGLSL +
3872                        "\n"
3873                        "void main()\n"
3874                        "{\n" +
3875                        getGLSLForSetters(m_testItems, 2) +
3876                        "\n"
3877                        "    reportIntersectionEXT(0.95f, 0);\n"
3878                        "}\n";
3879 
3880             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
3881         }
3882 
3883         {
3884             std::stringstream css;
3885 
3886             css << preamble
3887                 << "\n"
3888                    "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n"
3889                    "\n" +
3890                        inputBufferGLSL + outputBufferGLSL +
3891                        "\n"
3892                        "void main()\n"
3893                        "{\n"
3894                        "    uint nRay = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + gl_LaunchIDEXT.y "
3895                        "* gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3896                        "\n" +
3897                        getGLSLForSetters(m_testItems, 0) + "}\n";
3898 
3899             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
3900         }
3901 
3902         {
3903             std::stringstream css;
3904 
3905             css << preamble
3906                 << "layout(location = 0)                      rayPayloadEXT vec3       unusedPayload;\n"
3907                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
3908                    "\n"
3909                    "void main()\n"
3910                    "{\n"
3911                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3912                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3913                    "    uint  rayFlags     = 0;\n"
3914                    "    float tmin         = 0.001;\n"
3915                    "    float tmax         = 9.0;\n"
3916                    "\n"
3917                    "    uint  cullMask     = 0xFF;\n"
3918                    "    vec3  cellStartXYZ = vec3(nInvocation * 2.0, 0.0, 0.0);\n"
3919                    "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
3920                    "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
3921                    "    vec3  origin       = target - vec3(0, 2, 0);\n"
3922                    "    vec3  direct       = normalize(target - origin);\n"
3923                    "\n"
3924                    "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, "
3925                    "0);\n"
3926                    "}\n";
3927 
3928             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
3929         }
3930     }
3931 
isExplicitScalarOffsetTest(const TestType & testType)3932     static bool isExplicitScalarOffsetTest(const TestType &testType)
3933     {
3934         return (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1) ||
3935                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2) ||
3936                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3) ||
3937                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4) ||
3938                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5) ||
3939                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6);
3940     }
3941 
isExplicitSTD430OffsetTest(const TestType & testType)3942     static bool isExplicitSTD430OffsetTest(const TestType &testType)
3943     {
3944         return (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1) ||
3945                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2) ||
3946                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3) ||
3947                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4) ||
3948                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5) ||
3949                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6);
3950     }
3951 
isScalarLayoutTest(const TestType & testType)3952     static bool isScalarLayoutTest(const TestType &testType)
3953     {
3954         return (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_1) ||
3955                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_2) ||
3956                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_3) ||
3957                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_4) ||
3958                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_5) ||
3959                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_6);
3960     }
3961 
isSTD430LayoutTest(const TestType & testType)3962     static bool isSTD430LayoutTest(const TestType &testType)
3963     {
3964         return (testType == TestType::SHADER_RECORD_BLOCK_STD430_1) ||
3965                (testType == TestType::SHADER_RECORD_BLOCK_STD430_2) ||
3966                (testType == TestType::SHADER_RECORD_BLOCK_STD430_3) ||
3967                (testType == TestType::SHADER_RECORD_BLOCK_STD430_4) ||
3968                (testType == TestType::SHADER_RECORD_BLOCK_STD430_5) ||
3969                (testType == TestType::SHADER_RECORD_BLOCK_STD430_6);
3970     }
3971 
isTest(const TestType & testType)3972     static bool isTest(const TestType &testType)
3973     {
3974         return (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1) ||
3975                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2) ||
3976                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3) ||
3977                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4) ||
3978                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5) ||
3979                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6) ||
3980                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1) ||
3981                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2) ||
3982                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3) ||
3983                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4) ||
3984                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5) ||
3985                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6) ||
3986                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_1) ||
3987                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_2) ||
3988                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_3) ||
3989                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_4) ||
3990                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_5) ||
3991                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_6) ||
3992                (testType == TestType::SHADER_RECORD_BLOCK_STD430_1) ||
3993                (testType == TestType::SHADER_RECORD_BLOCK_STD430_2) ||
3994                (testType == TestType::SHADER_RECORD_BLOCK_STD430_3) ||
3995                (testType == TestType::SHADER_RECORD_BLOCK_STD430_4) ||
3996                (testType == TestType::SHADER_RECORD_BLOCK_STD430_5) ||
3997                (testType == TestType::SHADER_RECORD_BLOCK_STD430_6);
3998     }
3999 
usesF64(const TestType & testType)4000     static bool usesF64(const TestType &testType)
4001     {
4002         const auto tested_var_types = getVarsToTest(testType);
4003         const bool has_f64 =
4004             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DOUBLE) != tested_var_types.end();
4005         const bool has_f64vec2 =
4006             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DVEC2) != tested_var_types.end();
4007         const bool has_f64vec3 =
4008             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DVEC3) != tested_var_types.end();
4009         const bool has_f64vec4 =
4010             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DVEC4) != tested_var_types.end();
4011         const bool has_f64mat2 =
4012             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DMAT2) != tested_var_types.end();
4013         const bool has_f64mat3 =
4014             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DMAT3) != tested_var_types.end();
4015         const bool has_f64mat4 =
4016             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DMAT4) != tested_var_types.end();
4017 
4018         return (has_f64 || has_f64vec2 || has_f64vec3 || has_f64vec4 || has_f64mat2 || has_f64mat3 || has_f64mat4);
4019     }
4020 
usesI8(const TestType & testType)4021     static bool usesI8(const TestType &testType)
4022     {
4023         const auto tested_var_types = getVarsToTest(testType);
4024         const bool has_i8 =
4025             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::INT8) != tested_var_types.end();
4026         const bool has_i8vec2 =
4027             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I8VEC2) != tested_var_types.end();
4028         const bool has_i8vec3 =
4029             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I8VEC3) != tested_var_types.end();
4030         const bool has_i8vec4 =
4031             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I8VEC4) != tested_var_types.end();
4032 
4033         return (has_i8 || has_i8vec2 || has_i8vec3 || has_i8vec4);
4034     }
4035 
usesI16(const TestType & testType)4036     static bool usesI16(const TestType &testType)
4037     {
4038         const auto tested_var_types = getVarsToTest(testType);
4039         const bool has_i16 =
4040             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::INT16) != tested_var_types.end();
4041         const bool has_i16vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I16VEC2) !=
4042                                  tested_var_types.end();
4043         const bool has_i16vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I16VEC3) !=
4044                                  tested_var_types.end();
4045         const bool has_i16vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I16VEC4) !=
4046                                  tested_var_types.end();
4047 
4048         return (has_i16 || has_i16vec2 || has_i16vec3 || has_i16vec4);
4049     }
4050 
usesI64(const TestType & testType)4051     static bool usesI64(const TestType &testType)
4052     {
4053         const auto tested_var_types = getVarsToTest(testType);
4054         const bool has_i64 =
4055             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::INT64) != tested_var_types.end();
4056         const bool has_i64vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I64VEC2) !=
4057                                  tested_var_types.end();
4058         const bool has_i64vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I64VEC3) !=
4059                                  tested_var_types.end();
4060         const bool has_i64vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I64VEC4) !=
4061                                  tested_var_types.end();
4062 
4063         return (has_i64 || has_i64vec2 || has_i64vec3 || has_i64vec4);
4064     }
4065 
usesU8(const TestType & testType)4066     static bool usesU8(const TestType &testType)
4067     {
4068         const auto tested_var_types = getVarsToTest(testType);
4069         const bool has_u8 =
4070             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::UINT8) != tested_var_types.end();
4071         const bool has_u8vec2 =
4072             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U8VEC2) != tested_var_types.end();
4073         const bool has_u8vec3 =
4074             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U8VEC3) != tested_var_types.end();
4075         const bool has_u8vec4 =
4076             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U8VEC4) != tested_var_types.end();
4077 
4078         return (has_u8 || has_u8vec2 || has_u8vec3 || has_u8vec4);
4079     }
4080 
usesU16(const TestType & testType)4081     static bool usesU16(const TestType &testType)
4082     {
4083         const auto tested_var_types = getVarsToTest(testType);
4084         const bool has_u16 =
4085             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::UINT16) != tested_var_types.end();
4086         const bool has_u16vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U16VEC2) !=
4087                                  tested_var_types.end();
4088         const bool has_u16vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U16VEC3) !=
4089                                  tested_var_types.end();
4090         const bool has_u16vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U16VEC4) !=
4091                                  tested_var_types.end();
4092 
4093         return (has_u16 || has_u16vec2 || has_u16vec3 || has_u16vec4);
4094     }
4095 
usesU64(const TestType & testType)4096     static bool usesU64(const TestType &testType)
4097     {
4098         const auto tested_var_types = getVarsToTest(testType);
4099         const bool has_u64 =
4100             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::UINT64) != tested_var_types.end();
4101         const bool has_u64vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U64VEC2) !=
4102                                  tested_var_types.end();
4103         const bool has_u64vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U64VEC3) !=
4104                                  tested_var_types.end();
4105         const bool has_u64vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U64VEC4) !=
4106                                  tested_var_types.end();
4107 
4108         return (has_u64 || has_u64vec2 || has_u64vec3 || has_u64vec4);
4109     }
4110 
verifyResultBuffer(const void * resultBufferDataPtr) const4111     bool verifyResultBuffer(const void *resultBufferDataPtr) const final
4112     {
4113         bool result = false;
4114 
4115         for (const auto &iterator : m_shaderStageToResultBufferOffset)
4116         {
4117             const auto currentShaderStage = iterator.first;
4118             const auto shaderGroup        = ((currentShaderStage == VK_SHADER_STAGE_ANY_HIT_BIT_KHR) ||
4119                                       (currentShaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) ||
4120                                       (currentShaderStage == VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ?
4121                                                 ShaderGroups::HIT_GROUP :
4122                                                 ShaderGroups::MISS_GROUP;
4123             const auto resultStartOffset  = iterator.second;
4124 
4125             if (currentShaderStage != VK_SHADER_STAGE_MISS_BIT_KHR)
4126                 continue;
4127 
4128             for (const auto &currentItem : m_testItems.items)
4129             {
4130                 const auto baseDataType      = getBaseType(currentItem.type);
4131                 const auto componentSize     = getComponentSizeBytes(baseDataType);
4132                 const auto &expectedDataVec  = currentItem.shaderGroupToRecordDataMap.at(shaderGroup);
4133                 auto expectedDataPtr         = reinterpret_cast<const uint8_t *>(expectedDataVec.data());
4134                 const auto isMatrixType      = isMatrix(currentItem.type);
4135                 const auto nComponents       = getNComponents(currentItem.type);
4136                 const uint8_t *resultDataPtr = reinterpret_cast<const uint8_t *>(resultBufferDataPtr) +
4137                                                resultStartOffset + currentItem.resultBufferProps.bufferOffset;
4138 
4139                 for (uint32_t nArrayItem = 0; nArrayItem < currentItem.arraySize; ++nArrayItem)
4140                 {
4141                     for (uint32_t nComponent = 0; nComponent < nComponents; ++nComponent)
4142                     {
4143                         const auto expectedComponentDataPtr =
4144                             expectedDataPtr +
4145                             ((!isMatrixType) ? componentSize * nComponent :
4146                                                currentItem.inputBufferProps.matrixElementStartOffsets.at(nComponent));
4147                         const auto resultComponentDataPtr =
4148                             resultDataPtr +
4149                             ((!isMatrixType) ? componentSize * nComponent :
4150                                                currentItem.resultBufferProps.matrixElementStartOffsets.at(nComponent));
4151 
4152                         switch (baseDataType)
4153                         {
4154                         case BaseType::F32:
4155                         {
4156                             if (fabs(*reinterpret_cast<const float *>(resultComponentDataPtr) -
4157                                      *reinterpret_cast<const float *>(expectedComponentDataPtr)) > 1e-3f)
4158                             {
4159                                 goto end;
4160                             }
4161 
4162                             break;
4163                         }
4164 
4165                         case BaseType::F64:
4166                         {
4167                             if (fabs(*reinterpret_cast<const double *>(resultComponentDataPtr) -
4168                                      *reinterpret_cast<const double *>(expectedComponentDataPtr)) > 1e-3)
4169                             {
4170                                 goto end;
4171                             }
4172 
4173                             break;
4174                         }
4175 
4176                         case BaseType::I8:
4177                         {
4178                             if (*reinterpret_cast<const int8_t *>(resultComponentDataPtr) !=
4179                                 *reinterpret_cast<const int8_t *>(expectedComponentDataPtr))
4180                             {
4181                                 goto end;
4182                             }
4183 
4184                             break;
4185                         }
4186 
4187                         case BaseType::I16:
4188                         {
4189                             if (*reinterpret_cast<const int16_t *>(resultComponentDataPtr) !=
4190                                 *reinterpret_cast<const int16_t *>(expectedComponentDataPtr))
4191                             {
4192                                 goto end;
4193                             }
4194 
4195                             break;
4196                         }
4197 
4198                         case BaseType::I32:
4199                         {
4200                             if (*reinterpret_cast<const int32_t *>(resultComponentDataPtr) !=
4201                                 *reinterpret_cast<const int32_t *>(expectedComponentDataPtr))
4202                             {
4203                                 goto end;
4204                             }
4205 
4206                             break;
4207                         }
4208 
4209                         case BaseType::I64:
4210                         {
4211                             if (*reinterpret_cast<const int64_t *>(resultComponentDataPtr) !=
4212                                 *reinterpret_cast<const int64_t *>(expectedComponentDataPtr))
4213                             {
4214                                 goto end;
4215                             }
4216 
4217                             break;
4218                         }
4219 
4220                         case BaseType::U8:
4221                         {
4222                             if (*reinterpret_cast<const uint8_t *>(resultComponentDataPtr) !=
4223                                 *reinterpret_cast<const uint8_t *>(expectedComponentDataPtr))
4224                             {
4225                                 goto end;
4226                             }
4227 
4228                             break;
4229                         }
4230 
4231                         case BaseType::U16:
4232                         {
4233                             if (*reinterpret_cast<const uint16_t *>(resultComponentDataPtr) !=
4234                                 *reinterpret_cast<const uint16_t *>(expectedComponentDataPtr))
4235                             {
4236                                 goto end;
4237                             }
4238 
4239                             break;
4240                         }
4241 
4242                         case BaseType::U32:
4243                         {
4244                             if (*reinterpret_cast<const uint32_t *>(resultComponentDataPtr) !=
4245                                 *reinterpret_cast<const uint32_t *>(expectedComponentDataPtr))
4246                             {
4247                                 goto end;
4248                             }
4249 
4250                             break;
4251                         }
4252 
4253                         case BaseType::U64:
4254                         {
4255                             if (*reinterpret_cast<const uint64_t *>(resultComponentDataPtr) !=
4256                                 *reinterpret_cast<const uint64_t *>(expectedComponentDataPtr))
4257                             {
4258                                 goto end;
4259                             }
4260 
4261                             break;
4262                         }
4263 
4264                         default:
4265                         {
4266                             DE_ASSERT(false);
4267                         }
4268                         }
4269                     }
4270 
4271                     expectedDataPtr += currentItem.inputBufferProps.arrayStride;
4272                     resultDataPtr += currentItem.resultBufferProps.arrayStride;
4273                 }
4274             }
4275         }
4276 
4277         result = true;
4278     end:
4279         return result;
4280     }
4281 
4282 private:
4283     typedef struct Item
4284     {
4285         struct BufferProps
4286         {
4287             uint32_t arrayStride;
4288             uint32_t bufferOffset;
4289             std::vector<uint32_t> matrixElementStartOffsets; //< Holds offsets to consecutive matrix element values.
4290 
BufferPropsvkt::RayTracing::__anon87081bdf0111::ShaderRecordBlockTest::Item::BufferProps4291             BufferProps() : arrayStride(0), bufferOffset(0xFFFFFFFF)
4292             {
4293                 /* Stub */
4294             }
4295         };
4296 
4297         BufferProps inputBufferProps;
4298         BufferProps resultBufferProps;
4299 
4300         uint32_t arraySize;
4301         MatrixMajorOrder matrixOrder;
4302         std::string name;
4303         VariableType type;
4304 
4305         std::map<ShaderGroups, std::vector<uint8_t>> shaderGroupToRecordDataMap;
4306 
Itemvkt::RayTracing::__anon87081bdf0111::ShaderRecordBlockTest::Item4307         Item() : arraySize(0), matrixOrder(MatrixMajorOrder::UNKNOWN), type(VariableType::UNKNOWN)
4308         {
4309             /* Stub */
4310         }
4311     } Item;
4312 
4313     struct StructItem
4314     {
4315         std::vector<Item> items;
4316     };
4317 
4318     // Private functions
getBaseType(const VariableType & type) const4319     BaseType getBaseType(const VariableType &type) const
4320     {
4321         auto result = BaseType::UNKNOWN;
4322 
4323         switch (type)
4324         {
4325         case VariableType::FLOAT:
4326         case VariableType::MAT2:
4327         case VariableType::MAT2X2:
4328         case VariableType::MAT2X3:
4329         case VariableType::MAT2X4:
4330         case VariableType::MAT3:
4331         case VariableType::MAT3X2:
4332         case VariableType::MAT3X3:
4333         case VariableType::MAT3X4:
4334         case VariableType::MAT4:
4335         case VariableType::MAT4X2:
4336         case VariableType::MAT4X3:
4337         case VariableType::MAT4X4:
4338         case VariableType::VEC2:
4339         case VariableType::VEC3:
4340         case VariableType::VEC4:
4341         {
4342             result = BaseType::F32;
4343 
4344             break;
4345         }
4346 
4347         case VariableType::DOUBLE:
4348         case VariableType::DMAT2:
4349         case VariableType::DMAT2X2:
4350         case VariableType::DMAT2X3:
4351         case VariableType::DMAT2X4:
4352         case VariableType::DMAT3:
4353         case VariableType::DMAT3X2:
4354         case VariableType::DMAT3X3:
4355         case VariableType::DMAT3X4:
4356         case VariableType::DMAT4:
4357         case VariableType::DMAT4X2:
4358         case VariableType::DMAT4X3:
4359         case VariableType::DMAT4X4:
4360         case VariableType::DVEC2:
4361         case VariableType::DVEC3:
4362         case VariableType::DVEC4:
4363         {
4364             result = BaseType::F64;
4365 
4366             break;
4367         }
4368 
4369         case VariableType::INT16:
4370         case VariableType::I16VEC2:
4371         case VariableType::I16VEC3:
4372         case VariableType::I16VEC4:
4373         {
4374             result = BaseType::I16;
4375 
4376             break;
4377         }
4378 
4379         case VariableType::INT:
4380         case VariableType::IVEC2:
4381         case VariableType::IVEC3:
4382         case VariableType::IVEC4:
4383         {
4384             result = BaseType::I32;
4385 
4386             break;
4387         }
4388 
4389         case VariableType::INT64:
4390         case VariableType::I64VEC2:
4391         case VariableType::I64VEC3:
4392         case VariableType::I64VEC4:
4393         {
4394             result = BaseType::I64;
4395 
4396             break;
4397         }
4398 
4399         case VariableType::INT8:
4400         case VariableType::I8VEC2:
4401         case VariableType::I8VEC3:
4402         case VariableType::I8VEC4:
4403         {
4404             result = BaseType::I8;
4405 
4406             break;
4407         }
4408 
4409         case VariableType::UINT16:
4410         case VariableType::U16VEC2:
4411         case VariableType::U16VEC3:
4412         case VariableType::U16VEC4:
4413         {
4414             result = BaseType::U16;
4415 
4416             break;
4417         }
4418 
4419         case VariableType::UINT:
4420         case VariableType::UVEC2:
4421         case VariableType::UVEC3:
4422         case VariableType::UVEC4:
4423         {
4424             result = BaseType::U32;
4425 
4426             break;
4427         }
4428 
4429         case VariableType::UINT64:
4430         case VariableType::U64VEC2:
4431         case VariableType::U64VEC3:
4432         case VariableType::U64VEC4:
4433         {
4434             result = BaseType::U64;
4435 
4436             break;
4437         }
4438 
4439         case VariableType::UINT8:
4440         case VariableType::U8VEC2:
4441         case VariableType::U8VEC3:
4442         case VariableType::U8VEC4:
4443         {
4444             result = BaseType::U8;
4445 
4446             break;
4447         }
4448 
4449         default:
4450         {
4451             DE_ASSERT(false);
4452         }
4453         }
4454 
4455         return result;
4456     }
4457 
getComponentSizeBytes(const BaseType & type) const4458     uint32_t getComponentSizeBytes(const BaseType &type) const
4459     {
4460         uint32_t result = 0;
4461 
4462         switch (type)
4463         {
4464         case BaseType::I8:
4465         case BaseType::U8:
4466         {
4467             result = 1;
4468 
4469             break;
4470         }
4471 
4472         case BaseType::I16:
4473         case BaseType::U16:
4474         {
4475             result = 2;
4476 
4477             break;
4478         }
4479 
4480         case BaseType::F32:
4481         case BaseType::I32:
4482         case BaseType::U32:
4483         {
4484             result = 4;
4485 
4486             break;
4487         }
4488 
4489         case BaseType::F64:
4490         case BaseType::I64:
4491         case BaseType::U64:
4492         {
4493             result = 8;
4494 
4495             break;
4496         }
4497 
4498         default:
4499         {
4500             DE_ASSERT(false);
4501         }
4502         }
4503 
4504         return result;
4505     }
4506 
getGLSLForSetters(const StructItem & item,const uint32_t & nResultArrayItem) const4507     std::string getGLSLForSetters(const StructItem &item, const uint32_t &nResultArrayItem) const
4508     {
4509         std::string result;
4510 
4511         for (const auto &currentItem : item.items)
4512         {
4513             if (currentItem.arraySize > 1)
4514             {
4515                 result += "for (uint nArrayItem = 0; nArrayItem < " + de::toString(currentItem.arraySize) +
4516                           "; ++nArrayItem)\n"
4517                           "{\n";
4518             }
4519 
4520             result += "results[" + de::toString(nResultArrayItem) + "]." + currentItem.name;
4521 
4522             if (currentItem.arraySize > 1)
4523             {
4524                 result += "[nArrayItem]";
4525             }
4526 
4527             result += " = inputBuffer." + currentItem.name;
4528 
4529             if (currentItem.arraySize > 1)
4530             {
4531                 result += "[nArrayItem]";
4532             }
4533 
4534             result += ";\n";
4535 
4536             if (currentItem.arraySize > 1)
4537             {
4538                 result += "}\n";
4539             }
4540         }
4541 
4542         return result;
4543     }
4544 
getGLSLForStructItem(const StructItem & item,const bool & includeOffsetLayoutQualifier,const bool & targetsInputBuffer) const4545     std::string getGLSLForStructItem(const StructItem &item, const bool &includeOffsetLayoutQualifier,
4546                                      const bool &targetsInputBuffer) const
4547     {
4548         std::string result;
4549 
4550         for (const auto &currentItem : item.items)
4551         {
4552             const bool needsMatrixOrderQualifier = (currentItem.matrixOrder == MatrixMajorOrder::ROW_MAJOR);
4553             const auto variableTypeGLSL          = getVariableTypeGLSLType(currentItem.type);
4554             uint32_t nLayoutQualifiersUsed       = 0;
4555             const uint32_t nLayoutQualifierUses =
4556                 ((includeOffsetLayoutQualifier) ? 1 : 0) + ((needsMatrixOrderQualifier) ? 1 : 0);
4557             const bool usesLayoutQualifiers = (nLayoutQualifierUses > 0);
4558 
4559             if (usesLayoutQualifiers)
4560             {
4561                 result += "layout(";
4562             }
4563 
4564             if (includeOffsetLayoutQualifier)
4565             {
4566                 result += "offset = " + de::toString((targetsInputBuffer) ? currentItem.inputBufferProps.bufferOffset :
4567                                                                             currentItem.resultBufferProps.bufferOffset);
4568 
4569                 if ((++nLayoutQualifiersUsed) != nLayoutQualifierUses)
4570                 {
4571                     result += ", ";
4572                 }
4573             }
4574 
4575             if (needsMatrixOrderQualifier)
4576             {
4577                 result += ((currentItem.matrixOrder == MatrixMajorOrder::COLUMN_MAJOR) ? "column_major" : "row_major");
4578 
4579                 if ((++nLayoutQualifiersUsed) != nLayoutQualifierUses)
4580                 {
4581                     result += ", ";
4582                 }
4583             }
4584 
4585             if (usesLayoutQualifiers)
4586             {
4587                 result += ") ";
4588             }
4589 
4590             result += variableTypeGLSL + std::string(" ") + currentItem.name;
4591 
4592             if (currentItem.arraySize != 1)
4593             {
4594                 result += "[" + de::toString(currentItem.arraySize) + "]";
4595             }
4596 
4597             result += ";\n";
4598         }
4599 
4600         return result;
4601     }
4602 
getMatrixSize(const VariableType & type) const4603     tcu::UVec2 getMatrixSize(const VariableType &type) const
4604     {
4605         auto result = tcu::UVec2();
4606 
4607         switch (type)
4608         {
4609         case VariableType::DMAT2:
4610         case VariableType::DMAT2X2:
4611         case VariableType::MAT2:
4612         case VariableType::MAT2X2:
4613         {
4614             result = tcu::UVec2(2, 2);
4615 
4616             break;
4617         }
4618 
4619         case VariableType::DMAT2X3:
4620         case VariableType::MAT2X3:
4621         {
4622             result = tcu::UVec2(2, 3);
4623 
4624             break;
4625         }
4626 
4627         case VariableType::DMAT2X4:
4628         case VariableType::MAT2X4:
4629         {
4630             result = tcu::UVec2(2, 4);
4631 
4632             break;
4633         }
4634 
4635         case VariableType::DMAT3:
4636         case VariableType::DMAT3X3:
4637         case VariableType::MAT3:
4638         case VariableType::MAT3X3:
4639         {
4640             result = tcu::UVec2(3, 3);
4641 
4642             break;
4643         }
4644 
4645         case VariableType::DMAT3X2:
4646         case VariableType::MAT3X2:
4647         {
4648             result = tcu::UVec2(3, 2);
4649 
4650             break;
4651         }
4652 
4653         case VariableType::DMAT3X4:
4654         case VariableType::MAT3X4:
4655         {
4656             result = tcu::UVec2(3, 4);
4657 
4658             break;
4659         }
4660 
4661         case VariableType::DMAT4:
4662         case VariableType::DMAT4X4:
4663         case VariableType::MAT4:
4664         case VariableType::MAT4X4:
4665         {
4666             result = tcu::UVec2(4, 4);
4667 
4668             break;
4669         }
4670 
4671         case VariableType::DMAT4X2:
4672         case VariableType::MAT4X2:
4673         {
4674             result = tcu::UVec2(4, 2);
4675 
4676             break;
4677         }
4678 
4679         case VariableType::DMAT4X3:
4680         case VariableType::MAT4X3:
4681         {
4682             result = tcu::UVec2(4, 3);
4683 
4684             break;
4685         }
4686 
4687         default:
4688         {
4689             DE_ASSERT(false);
4690 
4691             break;
4692         }
4693         }
4694 
4695         return result;
4696     }
4697 
getNComponents(const VariableType & type) const4698     uint32_t getNComponents(const VariableType &type) const
4699     {
4700         uint32_t result = 0;
4701 
4702         switch (type)
4703         {
4704         case VariableType::DOUBLE:
4705         case VariableType::FLOAT:
4706         case VariableType::INT8:
4707         case VariableType::INT16:
4708         case VariableType::INT64:
4709         case VariableType::INT:
4710         case VariableType::UINT:
4711         case VariableType::UINT8:
4712         case VariableType::UINT16:
4713         case VariableType::UINT64:
4714         {
4715             result = 1;
4716 
4717             break;
4718         }
4719 
4720         case VariableType::DVEC2:
4721         case VariableType::I8VEC2:
4722         case VariableType::I16VEC2:
4723         case VariableType::I64VEC2:
4724         case VariableType::IVEC2:
4725         case VariableType::U8VEC2:
4726         case VariableType::U16VEC2:
4727         case VariableType::U64VEC2:
4728         case VariableType::UVEC2:
4729         case VariableType::VEC2:
4730         {
4731             result = 2;
4732 
4733             break;
4734         }
4735 
4736         case VariableType::DVEC3:
4737         case VariableType::I8VEC3:
4738         case VariableType::I16VEC3:
4739         case VariableType::I64VEC3:
4740         case VariableType::IVEC3:
4741         case VariableType::U8VEC3:
4742         case VariableType::U16VEC3:
4743         case VariableType::U64VEC3:
4744         case VariableType::UVEC3:
4745         case VariableType::VEC3:
4746         {
4747             result = 3;
4748 
4749             break;
4750         }
4751 
4752         case VariableType::DMAT2:
4753         case VariableType::DMAT2X2:
4754         case VariableType::DVEC4:
4755         case VariableType::I8VEC4:
4756         case VariableType::I16VEC4:
4757         case VariableType::I64VEC4:
4758         case VariableType::IVEC4:
4759         case VariableType::MAT2:
4760         case VariableType::MAT2X2:
4761         case VariableType::U8VEC4:
4762         case VariableType::U16VEC4:
4763         case VariableType::U64VEC4:
4764         case VariableType::UVEC4:
4765         case VariableType::VEC4:
4766         {
4767             result = 4;
4768 
4769             break;
4770         }
4771 
4772         case VariableType::DMAT2X3:
4773         case VariableType::DMAT3X2:
4774         case VariableType::MAT2X3:
4775         case VariableType::MAT3X2:
4776         {
4777             result = 6;
4778 
4779             break;
4780         }
4781 
4782         case VariableType::DMAT2X4:
4783         case VariableType::DMAT4X2:
4784         case VariableType::MAT2X4:
4785         case VariableType::MAT4X2:
4786         {
4787             result = 8;
4788 
4789             break;
4790         }
4791 
4792         case VariableType::DMAT3:
4793         case VariableType::DMAT3X3:
4794         case VariableType::MAT3:
4795         case VariableType::MAT3X3:
4796         {
4797             result = 9;
4798 
4799             break;
4800         }
4801 
4802         case VariableType::DMAT3X4:
4803         case VariableType::DMAT4X3:
4804         case VariableType::MAT3X4:
4805         case VariableType::MAT4X3:
4806         {
4807             result = 12;
4808 
4809             break;
4810         }
4811 
4812         case VariableType::DMAT4:
4813         case VariableType::DMAT4X4:
4814         case VariableType::MAT4:
4815         case VariableType::MAT4X4:
4816         {
4817             result = 16;
4818 
4819             break;
4820         }
4821 
4822         default:
4823         {
4824             DE_ASSERT(false);
4825         }
4826         }
4827 
4828         return result;
4829     }
4830 
getNMatrixColumns(const VariableType & type) const4831     uint32_t getNMatrixColumns(const VariableType &type) const
4832     {
4833         uint32_t result = 0;
4834 
4835         switch (type)
4836         {
4837         case VariableType::DMAT2:
4838         case VariableType::DMAT2X2:
4839         case VariableType::DMAT2X3:
4840         case VariableType::DMAT2X4:
4841         case VariableType::MAT2:
4842         case VariableType::MAT2X2:
4843         case VariableType::MAT2X3:
4844         case VariableType::MAT2X4:
4845         {
4846             result = 2;
4847 
4848             break;
4849         }
4850 
4851         case VariableType::DMAT3:
4852         case VariableType::DMAT3X2:
4853         case VariableType::DMAT3X3:
4854         case VariableType::DMAT3X4:
4855         case VariableType::MAT3:
4856         case VariableType::MAT3X2:
4857         case VariableType::MAT3X4:
4858         case VariableType::MAT3X3:
4859         {
4860             result = 3;
4861 
4862             break;
4863         }
4864 
4865         case VariableType::DMAT4X2:
4866         case VariableType::MAT4X2:
4867         case VariableType::DMAT4X3:
4868         case VariableType::MAT4X3:
4869         case VariableType::DMAT4X4:
4870         case VariableType::DMAT4:
4871         case VariableType::MAT4X4:
4872         case VariableType::MAT4:
4873         {
4874             result = 4;
4875 
4876             break;
4877         }
4878 
4879         default:
4880         {
4881             DE_ASSERT(false);
4882         }
4883         }
4884 
4885         return result;
4886     }
4887 
getNMatrixRows(const VariableType & type) const4888     uint32_t getNMatrixRows(const VariableType &type) const
4889     {
4890         uint32_t result = 0;
4891 
4892         switch (type)
4893         {
4894         case VariableType::DMAT2:
4895         case VariableType::DMAT2X2:
4896         case VariableType::DMAT3X2:
4897         case VariableType::DMAT4X2:
4898         case VariableType::MAT2:
4899         case VariableType::MAT2X2:
4900         case VariableType::MAT3X2:
4901         case VariableType::MAT4X2:
4902         {
4903             result = 2;
4904 
4905             break;
4906         }
4907 
4908         case VariableType::DMAT2X3:
4909         case VariableType::DMAT3:
4910         case VariableType::DMAT3X3:
4911         case VariableType::DMAT4X3:
4912         case VariableType::MAT2X3:
4913         case VariableType::MAT3:
4914         case VariableType::MAT3X3:
4915         case VariableType::MAT4X3:
4916         {
4917             result = 3;
4918 
4919             break;
4920         }
4921 
4922         case VariableType::DMAT2X4:
4923         case VariableType::DMAT3X4:
4924         case VariableType::DMAT4:
4925         case VariableType::DMAT4X4:
4926         case VariableType::MAT2X4:
4927         case VariableType::MAT3X4:
4928         case VariableType::MAT4:
4929         case VariableType::MAT4X4:
4930         {
4931             result = 4;
4932 
4933             break;
4934         }
4935 
4936         default:
4937         {
4938             DE_ASSERT(false);
4939         }
4940         }
4941 
4942         return result;
4943     }
4944 
getVariableTypeGLSLType(const VariableType & type) const4945     const char *getVariableTypeGLSLType(const VariableType &type) const
4946     {
4947         const char *resultPtr = "!?";
4948 
4949         switch (type)
4950         {
4951         case VariableType::DOUBLE:
4952             resultPtr = "double";
4953             break;
4954         case VariableType::DMAT2:
4955             resultPtr = "dmat2";
4956             break;
4957         case VariableType::DMAT2X2:
4958             resultPtr = "dmat2x2";
4959             break;
4960         case VariableType::DMAT2X3:
4961             resultPtr = "dmat2x3";
4962             break;
4963         case VariableType::DMAT2X4:
4964             resultPtr = "dmat2x4";
4965             break;
4966         case VariableType::DMAT3:
4967             resultPtr = "dmat3";
4968             break;
4969         case VariableType::DMAT3X2:
4970             resultPtr = "dmat3x2";
4971             break;
4972         case VariableType::DMAT3X3:
4973             resultPtr = "dmat3x3";
4974             break;
4975         case VariableType::DMAT3X4:
4976             resultPtr = "dmat3x4";
4977             break;
4978         case VariableType::DMAT4:
4979             resultPtr = "dmat4";
4980             break;
4981         case VariableType::DMAT4X2:
4982             resultPtr = "dmat4x2";
4983             break;
4984         case VariableType::DMAT4X3:
4985             resultPtr = "dmat4x3";
4986             break;
4987         case VariableType::DMAT4X4:
4988             resultPtr = "dmat4x4";
4989             break;
4990         case VariableType::DVEC2:
4991             resultPtr = "dvec2";
4992             break;
4993         case VariableType::DVEC3:
4994             resultPtr = "dvec3";
4995             break;
4996         case VariableType::DVEC4:
4997             resultPtr = "dvec4";
4998             break;
4999         case VariableType::FLOAT:
5000             resultPtr = "float";
5001             break;
5002         case VariableType::INT16:
5003             resultPtr = "int16_t";
5004             break;
5005         case VariableType::INT64:
5006             resultPtr = "int64_t";
5007             break;
5008         case VariableType::INT8:
5009             resultPtr = "int8_t";
5010             break;
5011         case VariableType::INT:
5012             resultPtr = "int";
5013             break;
5014         case VariableType::I16VEC2:
5015             resultPtr = "i16vec2";
5016             break;
5017         case VariableType::I16VEC3:
5018             resultPtr = "i16vec3";
5019             break;
5020         case VariableType::I16VEC4:
5021             resultPtr = "i16vec4";
5022             break;
5023         case VariableType::I64VEC2:
5024             resultPtr = "i64vec2";
5025             break;
5026         case VariableType::I64VEC3:
5027             resultPtr = "i64vec3";
5028             break;
5029         case VariableType::I64VEC4:
5030             resultPtr = "i64vec4";
5031             break;
5032         case VariableType::I8VEC2:
5033             resultPtr = "i8vec2";
5034             break;
5035         case VariableType::I8VEC3:
5036             resultPtr = "i8vec3";
5037             break;
5038         case VariableType::I8VEC4:
5039             resultPtr = "i8vec4";
5040             break;
5041         case VariableType::IVEC2:
5042             resultPtr = "ivec2";
5043             break;
5044         case VariableType::IVEC3:
5045             resultPtr = "ivec3";
5046             break;
5047         case VariableType::IVEC4:
5048             resultPtr = "ivec4";
5049             break;
5050         case VariableType::MAT2:
5051             resultPtr = "mat2";
5052             break;
5053         case VariableType::MAT2X2:
5054             resultPtr = "mat2x2";
5055             break;
5056         case VariableType::MAT2X3:
5057             resultPtr = "mat2x3";
5058             break;
5059         case VariableType::MAT2X4:
5060             resultPtr = "mat2x4";
5061             break;
5062         case VariableType::MAT3:
5063             resultPtr = "mat3";
5064             break;
5065         case VariableType::MAT3X2:
5066             resultPtr = "mat3x2";
5067             break;
5068         case VariableType::MAT3X3:
5069             resultPtr = "mat3x3";
5070             break;
5071         case VariableType::MAT3X4:
5072             resultPtr = "mat3x4";
5073             break;
5074         case VariableType::MAT4:
5075             resultPtr = "mat4";
5076             break;
5077         case VariableType::MAT4X2:
5078             resultPtr = "mat4x2";
5079             break;
5080         case VariableType::MAT4X3:
5081             resultPtr = "mat4x3";
5082             break;
5083         case VariableType::MAT4X4:
5084             resultPtr = "mat4x4";
5085             break;
5086         case VariableType::UINT16:
5087             resultPtr = "uint16_t";
5088             break;
5089         case VariableType::UINT64:
5090             resultPtr = "uint64_t";
5091             break;
5092         case VariableType::UINT8:
5093             resultPtr = "uint8_t";
5094             break;
5095         case VariableType::UINT:
5096             resultPtr = "uint";
5097             break;
5098         case VariableType::U16VEC2:
5099             resultPtr = "u16vec2";
5100             break;
5101         case VariableType::U16VEC3:
5102             resultPtr = "u16vec3";
5103             break;
5104         case VariableType::U16VEC4:
5105             resultPtr = "u16vec4";
5106             break;
5107         case VariableType::U64VEC2:
5108             resultPtr = "u64vec2";
5109             break;
5110         case VariableType::U64VEC3:
5111             resultPtr = "u64vec3";
5112             break;
5113         case VariableType::U64VEC4:
5114             resultPtr = "u64vec4";
5115             break;
5116         case VariableType::U8VEC2:
5117             resultPtr = "u8vec2";
5118             break;
5119         case VariableType::U8VEC3:
5120             resultPtr = "u8vec3";
5121             break;
5122         case VariableType::U8VEC4:
5123             resultPtr = "u8vec4";
5124             break;
5125         case VariableType::UVEC2:
5126             resultPtr = "uvec2";
5127             break;
5128         case VariableType::UVEC3:
5129             resultPtr = "uvec3";
5130             break;
5131         case VariableType::UVEC4:
5132             resultPtr = "uvec4";
5133             break;
5134         case VariableType::VEC2:
5135             resultPtr = "vec2";
5136             break;
5137         case VariableType::VEC3:
5138             resultPtr = "vec3";
5139             break;
5140         case VariableType::VEC4:
5141             resultPtr = "vec4";
5142             break;
5143 
5144         default:
5145         {
5146             DE_ASSERT(false);
5147         }
5148         }
5149 
5150         return resultPtr;
5151     }
5152 
initTestItems()5153     void initTestItems()
5154     {
5155         de::Random randomNumberGenerator(13567);
5156         const uint32_t testArraySizes[] = {3, 7, 5};
5157 
5158         const ShaderGroups shaderGroups[] = {
5159             ShaderGroups::HIT_GROUP,
5160             ShaderGroups::MISS_GROUP,
5161         };
5162 
5163         const auto nTestArraySizes = sizeof(testArraySizes) / sizeof(testArraySizes[0]);
5164 
5165         for (const auto &currentVariableType : m_varTypesToTest)
5166         {
5167             const auto currentArraySize =
5168                 testArraySizes[static_cast<uint32_t>(m_testItems.items.size()) % nTestArraySizes];
5169             Item newItem;
5170 
5171             newItem.arraySize = currentArraySize;
5172             newItem.name      = "var" + de::toString(m_testItems.items.size());
5173             newItem.type      = currentVariableType;
5174 
5175             // TODO: glslang issue.
5176             // newItem.matrixOrder = static_cast<MatrixMajorOrder>(static_cast<uint32_t>(m_testItems.items.size() ) % static_cast<uint32_t>(MatrixMajorOrder::UNKNOWN) );
5177 
5178             newItem.matrixOrder = MatrixMajorOrder::COLUMN_MAJOR;
5179 
5180             m_testItems.items.push_back(newItem);
5181         }
5182 
5183         // Determine start offsets for matrix elements.
5184         //
5185         // Note: result buffer aways uses std430 layout.
5186         setSTD430MatrixElementOffsets(m_testItems, false /* updateInputBufferProps */);
5187         setSTD430ArrayStrides(m_testItems, false /* updateInputBufferProps */);
5188         setSTD430BufferOffsets(m_testItems, false /* updateInputBufferProps */);
5189 
5190         switch (m_testType)
5191         {
5192         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
5193         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
5194         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
5195         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
5196         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
5197         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
5198         {
5199             setExplicitScalarOffsetMatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5200 
5201             break;
5202         }
5203 
5204         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
5205         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
5206         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
5207         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
5208         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
5209         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
5210         {
5211             setExplicitSTD430OffsetMatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5212 
5213             break;
5214         }
5215 
5216         case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
5217         case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
5218         case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
5219         case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
5220         case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
5221         case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
5222         {
5223             setScalarMatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5224 
5225             break;
5226         }
5227 
5228         case TestType::SHADER_RECORD_BLOCK_STD430_1:
5229         case TestType::SHADER_RECORD_BLOCK_STD430_2:
5230         case TestType::SHADER_RECORD_BLOCK_STD430_3:
5231         case TestType::SHADER_RECORD_BLOCK_STD430_4:
5232         case TestType::SHADER_RECORD_BLOCK_STD430_5:
5233         case TestType::SHADER_RECORD_BLOCK_STD430_6:
5234         {
5235             setSTD430MatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5236 
5237             break;
5238         }
5239 
5240         default:
5241         {
5242             DE_ASSERT(false);
5243         }
5244         }
5245 
5246         // Configure array strides for the variables.
5247         switch (m_testType)
5248         {
5249         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
5250         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
5251         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
5252         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
5253         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
5254         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
5255         {
5256             setExplicitScalarOffsetArrayStrides(m_testItems, true /* updateInputBufferProps */);
5257 
5258             break;
5259         }
5260 
5261         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
5262         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
5263         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
5264         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
5265         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
5266         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
5267         {
5268             setExplicitSTD430OffsetArrayStrides(m_testItems, true /* updateInputBufferProps */);
5269 
5270             break;
5271         }
5272 
5273         case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
5274         case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
5275         case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
5276         case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
5277         case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
5278         case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
5279         {
5280             setScalarArrayStrides(m_testItems, true /* updateInputBufferProps */);
5281 
5282             break;
5283         }
5284 
5285         case TestType::SHADER_RECORD_BLOCK_STD430_1:
5286         case TestType::SHADER_RECORD_BLOCK_STD430_2:
5287         case TestType::SHADER_RECORD_BLOCK_STD430_3:
5288         case TestType::SHADER_RECORD_BLOCK_STD430_4:
5289         case TestType::SHADER_RECORD_BLOCK_STD430_5:
5290         case TestType::SHADER_RECORD_BLOCK_STD430_6:
5291         {
5292             setSTD430ArrayStrides(m_testItems, true /* updateInputBufferProps */);
5293 
5294             break;
5295         }
5296 
5297         default:
5298         {
5299             DE_ASSERT(false);
5300         }
5301         }
5302 
5303         // Configure buffer offsets for the variables.
5304         switch (m_testType)
5305         {
5306         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
5307         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
5308         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
5309         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
5310         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
5311         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
5312         {
5313             setExplicitScalarOffsetBufferOffsets(m_testItems, true /* updateInputBufferProps */);
5314 
5315             break;
5316         }
5317 
5318         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
5319         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
5320         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
5321         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
5322         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
5323         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
5324         {
5325             setExplicitSTD430OffsetBufferOffsets(m_testItems, true /* updateInputBufferProps */);
5326 
5327             break;
5328         }
5329 
5330         case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
5331         case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
5332         case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
5333         case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
5334         case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
5335         case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
5336         {
5337             setScalarBufferOffsets(m_testItems, true /* updateInputBufferProps */);
5338 
5339             break;
5340         }
5341 
5342         case TestType::SHADER_RECORD_BLOCK_STD430_1:
5343         case TestType::SHADER_RECORD_BLOCK_STD430_2:
5344         case TestType::SHADER_RECORD_BLOCK_STD430_3:
5345         case TestType::SHADER_RECORD_BLOCK_STD430_4:
5346         case TestType::SHADER_RECORD_BLOCK_STD430_5:
5347         case TestType::SHADER_RECORD_BLOCK_STD430_6:
5348         {
5349             setSTD430BufferOffsets(m_testItems, true /* updateInputBufferProps */);
5350 
5351             break;
5352         }
5353 
5354         default:
5355         {
5356             DE_ASSERT(false);
5357         }
5358         }
5359 
5360         // Bake data to be used in the tested buffer.
5361         for (auto &currentTestItem : m_testItems.items)
5362         {
5363             const auto baseType           = getBaseType(currentTestItem.type);
5364             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5365             const bool isMatrixType       = isMatrix(currentTestItem.type);
5366             const auto nComponents        = getNComponents(currentTestItem.type);
5367             const auto nBytesNeeded       = currentTestItem.arraySize * currentTestItem.inputBufferProps.arrayStride;
5368 
5369             for (const auto &currentShaderGroup : shaderGroups)
5370             {
5371                 auto &currentDataVec = currentTestItem.shaderGroupToRecordDataMap[currentShaderGroup];
5372 
5373                 currentDataVec.resize(nBytesNeeded);
5374 
5375                 for (uint32_t nArrayItem = 0; nArrayItem < currentTestItem.arraySize; ++nArrayItem)
5376                 {
5377                     uint8_t *currentItemDataPtr =
5378                         currentDataVec.data() + nArrayItem * currentTestItem.inputBufferProps.arrayStride;
5379 
5380                     for (uint32_t nComponent = 0; nComponent < nComponents; ++nComponent)
5381                     {
5382                         switch (baseType)
5383                         {
5384                         case BaseType::F32:
5385                         {
5386                             DE_ASSERT(currentItemDataPtr + sizeof(float) <=
5387                                       currentDataVec.data() + currentDataVec.size());
5388 
5389                             *reinterpret_cast<float *>(currentItemDataPtr) = randomNumberGenerator.getFloat();
5390 
5391                             break;
5392                         }
5393 
5394                         case BaseType::F64:
5395                         {
5396                             DE_ASSERT(currentItemDataPtr + sizeof(double) <=
5397                                       currentDataVec.data() + currentDataVec.size());
5398 
5399                             *reinterpret_cast<double *>(currentItemDataPtr) = randomNumberGenerator.getDouble();
5400 
5401                             break;
5402                         }
5403 
5404                         case BaseType::I8:
5405                         {
5406                             DE_ASSERT(currentItemDataPtr + sizeof(int8_t) <=
5407                                       currentDataVec.data() + currentDataVec.size());
5408 
5409                             *reinterpret_cast<int8_t *>(currentItemDataPtr) =
5410                                 static_cast<int8_t>(randomNumberGenerator.getInt(-128, 127));
5411 
5412                             break;
5413                         }
5414 
5415                         case BaseType::I16:
5416                         {
5417                             DE_ASSERT(currentItemDataPtr + sizeof(int16_t) <=
5418                                       currentDataVec.data() + currentDataVec.size());
5419 
5420                             *reinterpret_cast<int16_t *>(currentItemDataPtr) =
5421                                 static_cast<int16_t>(randomNumberGenerator.getInt(-32768, 32767));
5422 
5423                             break;
5424                         }
5425 
5426                         case BaseType::I32:
5427                         {
5428                             DE_ASSERT(currentItemDataPtr + sizeof(int32_t) <=
5429                                       currentDataVec.data() + currentDataVec.size());
5430 
5431                             *reinterpret_cast<int32_t *>(currentItemDataPtr) = randomNumberGenerator.getInt(
5432                                 static_cast<int>(-2147483648LL), static_cast<int>(2147483647LL));
5433 
5434                             break;
5435                         }
5436 
5437                         case BaseType::I64:
5438                         {
5439                             DE_ASSERT(currentItemDataPtr + sizeof(int64_t) <=
5440                                       currentDataVec.data() + currentDataVec.size());
5441 
5442                             *reinterpret_cast<int64_t *>(currentItemDataPtr) = randomNumberGenerator.getInt64();
5443 
5444                             break;
5445                         }
5446 
5447                         case BaseType::U8:
5448                         {
5449                             DE_ASSERT(currentItemDataPtr + sizeof(uint8_t) <=
5450                                       currentDataVec.data() + currentDataVec.size());
5451 
5452                             *reinterpret_cast<uint8_t *>(currentItemDataPtr) = randomNumberGenerator.getUint8();
5453 
5454                             break;
5455                         }
5456 
5457                         case BaseType::U16:
5458                         {
5459                             DE_ASSERT(currentItemDataPtr + sizeof(uint16_t) <=
5460                                       currentDataVec.data() + currentDataVec.size());
5461 
5462                             *reinterpret_cast<uint16_t *>(currentItemDataPtr) = randomNumberGenerator.getUint16();
5463 
5464                             break;
5465                         }
5466 
5467                         case BaseType::U32:
5468                         {
5469                             DE_ASSERT(currentItemDataPtr + sizeof(uint32_t) <=
5470                                       currentDataVec.data() + currentDataVec.size());
5471 
5472                             *reinterpret_cast<uint32_t *>(currentItemDataPtr) = randomNumberGenerator.getUint32();
5473 
5474                             break;
5475                         }
5476 
5477                         case BaseType::U64:
5478                         {
5479                             DE_ASSERT(currentItemDataPtr + sizeof(uint64_t) <=
5480                                       currentDataVec.data() + currentDataVec.size());
5481 
5482                             *reinterpret_cast<uint64_t *>(currentItemDataPtr) = randomNumberGenerator.getUint64();
5483 
5484                             break;
5485                         }
5486 
5487                         default:
5488                         {
5489                             DE_ASSERT(false);
5490                         }
5491                         }
5492 
5493                         if (isMatrixType)
5494                         {
5495                             if (nComponent != (nComponents - 1))
5496                             {
5497                                 const auto delta =
5498                                     currentTestItem.inputBufferProps.matrixElementStartOffsets.at(nComponent + 1) -
5499                                     currentTestItem.inputBufferProps.matrixElementStartOffsets.at(nComponent + 0);
5500 
5501                                 DE_ASSERT(delta >= componentSizeBytes);
5502 
5503                                 currentItemDataPtr += delta;
5504                             }
5505                         }
5506                         else
5507                         {
5508                             currentItemDataPtr += componentSizeBytes;
5509                         }
5510                     }
5511                 }
5512             }
5513         }
5514 
5515         // Merge individual member data into coalesced buffers.
5516         for (const auto &currentShaderGroup : shaderGroups)
5517         {
5518             auto &resultVec = m_shaderGroupToRecordDataMap[currentShaderGroup];
5519 
5520             {
5521                 const auto &lastItem = m_testItems.items.back();
5522 
5523                 resultVec.resize(lastItem.inputBufferProps.bufferOffset +
5524                                  lastItem.shaderGroupToRecordDataMap.at(currentShaderGroup).size());
5525             }
5526 
5527             for (const auto &currentVariable : m_testItems.items)
5528             {
5529                 const auto &currentVariableDataVec = currentVariable.shaderGroupToRecordDataMap.at(currentShaderGroup);
5530 
5531                 DE_ASSERT(resultVec.size() >=
5532                           currentVariable.inputBufferProps.bufferOffset + currentVariableDataVec.size());
5533 
5534                 memcpy(resultVec.data() + currentVariable.inputBufferProps.bufferOffset, currentVariableDataVec.data(),
5535                        currentVariableDataVec.size());
5536             }
5537         }
5538     }
5539 
isMatrix(const VariableType & type) const5540     bool isMatrix(const VariableType &type) const
5541     {
5542         bool result = false;
5543 
5544         switch (type)
5545         {
5546         case VariableType::DMAT2:
5547         case VariableType::DMAT2X2:
5548         case VariableType::DMAT2X3:
5549         case VariableType::DMAT2X4:
5550         case VariableType::DMAT3:
5551         case VariableType::DMAT3X2:
5552         case VariableType::DMAT3X3:
5553         case VariableType::DMAT3X4:
5554         case VariableType::DMAT4:
5555         case VariableType::DMAT4X2:
5556         case VariableType::DMAT4X3:
5557         case VariableType::DMAT4X4:
5558         case VariableType::MAT2:
5559         case VariableType::MAT2X2:
5560         case VariableType::MAT2X3:
5561         case VariableType::MAT2X4:
5562         case VariableType::MAT3:
5563         case VariableType::MAT3X2:
5564         case VariableType::MAT3X3:
5565         case VariableType::MAT3X4:
5566         case VariableType::MAT4:
5567         case VariableType::MAT4X2:
5568         case VariableType::MAT4X3:
5569         case VariableType::MAT4X4:
5570         {
5571             result = true;
5572 
5573             break;
5574         }
5575 
5576         case VariableType::DOUBLE:
5577         case VariableType::DVEC2:
5578         case VariableType::DVEC3:
5579         case VariableType::DVEC4:
5580         case VariableType::FLOAT:
5581         case VariableType::INT8:
5582         case VariableType::INT64:
5583         case VariableType::INT16:
5584         case VariableType::INT:
5585         case VariableType::I16VEC2:
5586         case VariableType::I16VEC3:
5587         case VariableType::I16VEC4:
5588         case VariableType::I64VEC2:
5589         case VariableType::I64VEC3:
5590         case VariableType::I64VEC4:
5591         case VariableType::I8VEC2:
5592         case VariableType::I8VEC3:
5593         case VariableType::I8VEC4:
5594         case VariableType::IVEC2:
5595         case VariableType::IVEC3:
5596         case VariableType::IVEC4:
5597         case VariableType::UINT8:
5598         case VariableType::UINT64:
5599         case VariableType::UINT16:
5600         case VariableType::UINT:
5601         case VariableType::U16VEC2:
5602         case VariableType::U16VEC3:
5603         case VariableType::U16VEC4:
5604         case VariableType::U64VEC2:
5605         case VariableType::U64VEC3:
5606         case VariableType::U64VEC4:
5607         case VariableType::U8VEC2:
5608         case VariableType::U8VEC3:
5609         case VariableType::U8VEC4:
5610         case VariableType::UVEC2:
5611         case VariableType::UVEC3:
5612         case VariableType::UVEC4:
5613         case VariableType::VEC2:
5614         case VariableType::VEC3:
5615         case VariableType::VEC4:
5616         {
5617             result = false;
5618 
5619             break;
5620         }
5621 
5622         default:
5623         {
5624             DE_ASSERT(false);
5625         }
5626         }
5627 
5628         return result;
5629     }
5630 
setExplicitScalarOffsetArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5631     void setExplicitScalarOffsetArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5632     {
5633         return setScalarArrayStrides(inputStruct, updateInputBufferProps);
5634     }
5635 
setExplicitScalarOffsetBufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5636     void setExplicitScalarOffsetBufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5637     {
5638         uint32_t nBytesConsumed = 0;
5639 
5640         for (auto &currentItem : inputStruct.items)
5641         {
5642             const auto baseType = getBaseType(currentItem.type);
5643             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5644             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5645             const auto isMatrixVariable   = isMatrix(currentItem.type);
5646             const auto nComponents        = getNComponents(currentItem.type);
5647 
5648             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, componentSizeBytes * 2);
5649 
5650             if (isMatrixVariable)
5651             {
5652                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5653             }
5654             else
5655             {
5656                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * componentSizeBytes * nComponents;
5657             }
5658         }
5659     }
5660 
setExplicitScalarOffsetElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5661     void setExplicitScalarOffsetElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5662     {
5663         return setScalarMatrixElementOffsets(inputStruct, updateInputBufferProps);
5664     }
5665 
setExplicitScalarOffsetMatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5666     void setExplicitScalarOffsetMatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5667     {
5668         return setScalarMatrixElementOffsets(inputStruct, updateInputBufferProps);
5669     }
5670 
setExplicitSTD430OffsetArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5671     void setExplicitSTD430OffsetArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5672     {
5673         return setSTD430ArrayStrides(inputStruct, updateInputBufferProps);
5674     }
5675 
setExplicitSTD430OffsetBufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5676     void setExplicitSTD430OffsetBufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5677     {
5678         uint32_t nBytesConsumed = 0;
5679 
5680         for (auto &currentItem : inputStruct.items)
5681         {
5682             const auto baseType = getBaseType(currentItem.type);
5683             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5684             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5685             const auto isMatrixVariable   = isMatrix(currentItem.type);
5686             const auto nComponents        = getNComponents(currentItem.type);
5687             uint32_t requiredAlignment    = 0;
5688 
5689             uint32_t nMatrixRows = 0;
5690 
5691             if (isMatrixVariable)
5692             {
5693                 nMatrixRows = getNMatrixRows(currentItem.type);
5694 
5695                 if (nMatrixRows == 3)
5696                 {
5697                     nMatrixRows = 4;
5698                 }
5699 
5700                 requiredAlignment = nMatrixRows * componentSizeBytes;
5701             }
5702             else if (nComponents == 1)
5703             {
5704                 DE_ASSERT((baseType == BaseType::F32) || (baseType == BaseType::F64) || (baseType == BaseType::I16) ||
5705                           (baseType == BaseType::I32) || (baseType == BaseType::I64) || (baseType == BaseType::I8) ||
5706                           (baseType == BaseType::U16) || (baseType == BaseType::U32) || (baseType == BaseType::U64) ||
5707                           (baseType == BaseType::U8));
5708 
5709                 requiredAlignment = componentSizeBytes;
5710             }
5711             else if (nComponents == 2)
5712             {
5713                 requiredAlignment = 2 * componentSizeBytes;
5714             }
5715             else
5716             {
5717                 requiredAlignment = 4 * componentSizeBytes;
5718             }
5719 
5720             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, requiredAlignment * 2);
5721 
5722             if (isMatrixVariable)
5723             {
5724                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5725             }
5726             else
5727             {
5728                 nBytesConsumed = bufferProps.bufferOffset +
5729                                  currentItem.arraySize * componentSizeBytes * ((nComponents == 3) ? 4 : nComponents);
5730             }
5731         }
5732     }
5733 
setExplicitSTD430OffsetElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5734     void setExplicitSTD430OffsetElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5735     {
5736         return setSTD430MatrixElementOffsets(inputStruct, updateInputBufferProps);
5737     }
5738 
setExplicitSTD430OffsetMatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5739     void setExplicitSTD430OffsetMatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5740     {
5741         return setSTD430MatrixElementOffsets(inputStruct, updateInputBufferProps);
5742     }
5743 
setSTD430ArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5744     void setSTD430ArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5745     {
5746         for (auto &currentItem : inputStruct.items)
5747         {
5748             const auto baseType = getBaseType(currentItem.type);
5749             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5750             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5751             const auto isMatrixVariable   = isMatrix(currentItem.type);
5752             const auto nComponents        = getNComponents(currentItem.type);
5753             uint32_t requiredStride       = 0;
5754 
5755             if (isMatrixVariable)
5756             {
5757                 auto nMatrixColumns = getNMatrixColumns(currentItem.type);
5758                 auto nMatrixRows    = getNMatrixRows(currentItem.type);
5759 
5760                 if (nMatrixRows == 3)
5761                 {
5762                     nMatrixRows = 4;
5763                 }
5764 
5765                 requiredStride = nMatrixRows * nMatrixColumns * componentSizeBytes;
5766             }
5767             else
5768             {
5769                 requiredStride = componentSizeBytes * ((nComponents == 3) ? 4 : nComponents);
5770             }
5771 
5772             bufferProps.arrayStride = requiredStride;
5773         }
5774     }
5775 
setSTD430BufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5776     void setSTD430BufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5777     {
5778         uint32_t nBytesConsumed = 0;
5779 
5780         for (auto &currentItem : inputStruct.items)
5781         {
5782             const auto baseType = getBaseType(currentItem.type);
5783             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5784             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5785             const auto isMatrixVariable   = isMatrix(currentItem.type);
5786             const auto nComponents        = getNComponents(currentItem.type);
5787             uint32_t requiredAlignment    = 0;
5788 
5789             uint32_t nMatrixRows = 0;
5790 
5791             if (isMatrixVariable)
5792             {
5793                 nMatrixRows = getNMatrixRows(currentItem.type);
5794 
5795                 if (nMatrixRows == 3)
5796                 {
5797                     nMatrixRows = 4;
5798                 }
5799 
5800                 requiredAlignment = nMatrixRows * componentSizeBytes;
5801             }
5802             else if (nComponents == 1)
5803             {
5804                 DE_ASSERT((baseType == BaseType::F32) || (baseType == BaseType::F64) || (baseType == BaseType::I16) ||
5805                           (baseType == BaseType::I32) || (baseType == BaseType::I64) || (baseType == BaseType::I8) ||
5806                           (baseType == BaseType::U16) || (baseType == BaseType::U32) || (baseType == BaseType::U64) ||
5807                           (baseType == BaseType::U8));
5808 
5809                 requiredAlignment = componentSizeBytes;
5810             }
5811             else if (nComponents == 2)
5812             {
5813                 requiredAlignment = 2 * componentSizeBytes;
5814             }
5815             else
5816             {
5817                 requiredAlignment = 4 * componentSizeBytes;
5818             }
5819 
5820             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, requiredAlignment);
5821 
5822             if (isMatrixVariable)
5823             {
5824                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5825             }
5826             else
5827             {
5828                 nBytesConsumed = bufferProps.bufferOffset +
5829                                  currentItem.arraySize * componentSizeBytes * ((nComponents == 3) ? 4 : nComponents);
5830             }
5831         }
5832     }
5833 
setScalarArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5834     void setScalarArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5835     {
5836         for (auto &currentItem : inputStruct.items)
5837         {
5838             const auto baseType = getBaseType(currentItem.type);
5839             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5840             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5841             const auto isMatrixVariable   = isMatrix(currentItem.type);
5842             const auto nComponents        = getNComponents(currentItem.type);
5843 
5844             if (isMatrixVariable)
5845             {
5846                 auto nMatrixColumns = getNMatrixColumns(currentItem.type);
5847                 auto nMatrixRows    = getNMatrixRows(currentItem.type);
5848 
5849                 bufferProps.arrayStride = nMatrixRows * nMatrixColumns * componentSizeBytes;
5850             }
5851             else
5852             {
5853                 bufferProps.arrayStride = componentSizeBytes * nComponents;
5854             }
5855         }
5856     }
5857 
setScalarBufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5858     void setScalarBufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5859     {
5860         uint32_t nBytesConsumed = 0;
5861 
5862         for (auto &currentItem : inputStruct.items)
5863         {
5864             const auto baseType = getBaseType(currentItem.type);
5865             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5866             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5867             const auto isMatrixVariable   = isMatrix(currentItem.type);
5868             const auto nComponents        = getNComponents(currentItem.type);
5869 
5870             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, componentSizeBytes);
5871 
5872             if (isMatrixVariable)
5873             {
5874                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5875             }
5876             else
5877             {
5878                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * componentSizeBytes * nComponents;
5879             }
5880         }
5881     }
5882 
setScalarMatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5883     void setScalarMatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5884     {
5885         for (auto &currentVariable : inputStruct.items)
5886         {
5887             if (isMatrix(currentVariable.type))
5888             {
5889                 auto &bufferProps =
5890                     (updateInputBufferProps) ? currentVariable.inputBufferProps : currentVariable.resultBufferProps;
5891                 const auto componentSizeBytes       = getComponentSizeBytes(getBaseType(currentVariable.type));
5892                 uint32_t currentMatrixElementOffset = 0;
5893                 const auto nMatrixColumns           = getNMatrixColumns(currentVariable.type);
5894                 const auto nMatrixRows              = getNMatrixRows(currentVariable.type);
5895 
5896                 for (uint32_t nMatrixColumn = 0; nMatrixColumn < nMatrixColumns; ++nMatrixColumn)
5897                 {
5898                     currentMatrixElementOffset =
5899                         de::roundUp(nMatrixRows * componentSizeBytes * nMatrixColumn, componentSizeBytes);
5900 
5901                     for (uint32_t nMatrixRow = 0; nMatrixRow < nMatrixRows; ++nMatrixRow)
5902                     {
5903                         bufferProps.matrixElementStartOffsets.push_back(currentMatrixElementOffset);
5904 
5905                         currentMatrixElementOffset += componentSizeBytes;
5906                     }
5907                 }
5908             }
5909         }
5910     }
5911 
setSTD430MatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5912     void setSTD430MatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5913     {
5914         for (auto &currentVariable : inputStruct.items)
5915         {
5916             if (isMatrix(currentVariable.type))
5917             {
5918                 auto &bufferProps =
5919                     (updateInputBufferProps) ? currentVariable.inputBufferProps : currentVariable.resultBufferProps;
5920                 const auto componentSizeBytes       = getComponentSizeBytes(getBaseType(currentVariable.type));
5921                 uint32_t currentMatrixElementOffset = 0;
5922                 auto nMatrixColumns                 = getNMatrixColumns(currentVariable.type);
5923                 auto nMatrixRows                    = getNMatrixRows(currentVariable.type);
5924 
5925                 if (currentVariable.matrixOrder == MatrixMajorOrder::COLUMN_MAJOR)
5926                 {
5927                     for (uint32_t nMatrixColumn = 0; nMatrixColumn < nMatrixColumns; ++nMatrixColumn)
5928                     {
5929                         currentMatrixElementOffset = de::roundUp(
5930                             static_cast<uint32_t>(nMatrixRows * componentSizeBytes * nMatrixColumn),
5931                             static_cast<uint32_t>(((nMatrixRows == 3) ? 4 : nMatrixRows) * componentSizeBytes));
5932 
5933                         for (uint32_t nMatrixRow = 0; nMatrixRow < nMatrixRows; ++nMatrixRow)
5934                         {
5935                             bufferProps.matrixElementStartOffsets.push_back(currentMatrixElementOffset);
5936 
5937                             currentMatrixElementOffset += componentSizeBytes;
5938                         }
5939                     }
5940                 }
5941                 else
5942                 {
5943                     // TODO
5944                     DE_ASSERT(false);
5945                 }
5946             }
5947         }
5948     }
5949 
5950     // Private variables
5951     const tcu::UVec3 m_gridSizeXYZ;
5952     const TestType m_testType;
5953     const std::vector<VariableType> m_varTypesToTest;
5954 
5955     uint32_t m_resultBufferSize;
5956     uint32_t m_shaderRecordSize;
5957     StructItem m_testItems;
5958 
5959     std::map<ShaderGroups, std::vector<uint8_t>> m_shaderGroupToRecordDataMap;
5960     std::map<VkShaderStageFlagBits, uint32_t> m_shaderStageToResultBufferOffset;
5961     std::unique_ptr<GridASProvider> m_asProviderPtr;
5962     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
5963 };
5964 
5965 class RecursiveTracesTest : public TestBase
5966 {
5967 public:
RecursiveTracesTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout,const uint32_t & depthToUse)5968     RecursiveTracesTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout,
5969                         const uint32_t &depthToUse)
5970         : m_asStructureLayout(asStructureLayout)
5971         , m_geometryType(geometryType)
5972         , m_depthToUse(depthToUse)
5973         , m_nRaysToTest(512)
5974         , m_maxResultBufferSizePermitted(512 * 1024768)
5975     {
5976         const auto nItemsExpectedPerRay         = static_cast<uint32_t>((1 << (m_depthToUse + 0)) - 1);
5977         const auto nItemsExpectedPerRayInclRgen = static_cast<uint32_t>((1 << (m_depthToUse + 1)) - 1);
5978 
5979         m_nResultItemsExpected     = nItemsExpectedPerRayInclRgen * m_nRaysToTest;
5980         m_nCHitInvocationsExpected = nItemsExpectedPerRay * m_nRaysToTest;
5981         m_nMissInvocationsExpected = nItemsExpectedPerRay * m_nRaysToTest;
5982 
5983         {
5984             const uint32_t nPreambleBytes = sizeof(uint32_t) * 3;
5985             const uint32_t resultItemSize = sizeof(uint32_t) * 4;
5986 
5987             m_nMaxResultItemsPermitted = (m_maxResultBufferSizePermitted - nPreambleBytes) / resultItemSize;
5988         }
5989     }
5990 
~RecursiveTracesTest()5991     ~RecursiveTracesTest()
5992     {
5993         /* Stub */
5994     }
5995 
getAHitShaderCollectionShaderNames() const5996     std::vector<std::string> getAHitShaderCollectionShaderNames() const final
5997     {
5998         return m_ahitShaderNameVec;
5999     }
6000 
getCHitShaderCollectionShaderNames() const6001     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
6002     {
6003         return m_chitShaderNameVec;
6004     }
6005 
getDispatchSize() const6006     tcu::UVec3 getDispatchSize() const final
6007     {
6008         DE_ASSERT(m_nRaysToTest != 0);
6009 
6010         return tcu::UVec3(m_nRaysToTest, 1u, 1u);
6011     }
6012 
getIntersectionShaderCollectionShaderNames() const6013     std::vector<std::string> getIntersectionShaderCollectionShaderNames() const final
6014     {
6015         const auto nIntersectionShaders =
6016             ((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::AABB_AND_TRIANGLES)) ?
6017                 m_depthToUse :
6018                 0;
6019 
6020         return std::vector<std::string>(nIntersectionShaders, {"intersection0"});
6021     }
6022 
getMaxRecursionDepthUsed() const6023     uint32_t getMaxRecursionDepthUsed() const final
6024     {
6025         return m_depthToUse;
6026     }
6027 
getMissShaderCollectionShaderNames() const6028     std::vector<std::string> getMissShaderCollectionShaderNames() const final
6029     {
6030         return m_missShaderNameVec;
6031     }
6032 
getResultBufferSize() const6033     uint32_t getResultBufferSize() const final
6034     {
6035         DE_ASSERT(m_depthToUse < 30); //< due to how nItemsExpectedPerRay is stored.
6036         DE_ASSERT(m_nRaysToTest != 0);
6037 
6038         /* NOTE: A single item is generated by rgen shader stage which is invoked once per each initial ray.
6039          *
6040          *       Each ray at level N generates two result items.
6041          *
6042          *       Thus, for a single initial traced ray, we need sum(2^depth)=2^(depth+1)-1 items.
6043          */
6044         const auto nItemsExpectedPerRay = static_cast<uint32_t>((1 << (m_depthToUse + 1)) - 1);
6045         const auto nResultItemsExpected = de::min(nItemsExpectedPerRay * m_nRaysToTest, m_nMaxResultItemsPermitted);
6046         const auto resultItemSize =
6047             static_cast<uint32_t>(sizeof(uint32_t) * 4 /* nOriginRay, stage, depth, parentResultItem */);
6048 
6049         return static_cast<uint32_t>(sizeof(uint32_t) * 3 /* nItemsRegistered, nCHitInvocations, nMissInvocations */) +
6050                nResultItemsExpected * resultItemSize;
6051     }
6052 
getSpecializationInfoPtr(const VkShaderStageFlagBits & shaderStage)6053     VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits &shaderStage) final
6054     {
6055         VkSpecializationInfo *resultPtr = nullptr;
6056 
6057         if (shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR || shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR)
6058         {
6059             resultPtr = &m_specializationInfo;
6060         }
6061 
6062         return resultPtr;
6063     }
6064 
getTLASPtrVecToBind() const6065     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
6066     {
6067         DE_ASSERT(m_tlPtr != nullptr);
6068 
6069         return {m_tlPtr.get()};
6070     }
6071 
init(vkt::Context &,RayTracingProperties *)6072     bool init(vkt::Context & /* context    */, RayTracingProperties * /* rtPropsPtr */) final
6073     {
6074         m_specializationEntry.constantID = 1;
6075         m_specializationEntry.offset     = 0;
6076         m_specializationEntry.size       = sizeof(uint32_t);
6077 
6078         m_specializationInfo.dataSize      = sizeof(uint32_t);
6079         m_specializationInfo.mapEntryCount = 1;
6080         m_specializationInfo.pData         = &m_depthToUse;
6081         m_specializationInfo.pMapEntries   = &m_specializationEntry;
6082 
6083         return true;
6084     }
6085 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)6086     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
6087                 VkCommandBuffer commandBuffer) final
6088     {
6089         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
6090                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
6091                                                                          tcu::UVec3(1, 1, 1),
6092                                                                          tcu::Vec3(2, 0, 2), /* gridInterCellDeltaXYZ */
6093                                                                          m_geometryType));
6094 
6095         m_tlPtr =
6096             asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer, 0, /* bottomLevelGeometryFlags */
6097                                       nullptr,                                        /* optASPropertyProviderPtr */
6098                                       nullptr);                                       /* optASFeedbackPtr         */
6099     }
6100 
initPrograms(SourceCollections & programCollection) const6101     void initPrograms(SourceCollections &programCollection) const final
6102     {
6103         const auto nLocationsPerPayload = 3; /* 3 scalar uints */
6104 
6105         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
6106                                                   0u,    /* flags        */
6107                                                   true); /* allowSpirv14 */
6108 
6109         std::vector<std::string> rayPayloadDefinitionVec(m_depthToUse);
6110         std::vector<std::string> rayPayloadInDefinitionVec(m_depthToUse);
6111 
6112         for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6113         {
6114             rayPayloadDefinitionVec.at(nLevel) = "layout(location = " + de::toString(nLocationsPerPayload * nLevel) +
6115                                                  ") rayPayloadEXT block\n"
6116                                                  "{\n"
6117                                                  "    uint currentDepth;\n"
6118                                                  "    uint currentNOriginRay;\n"
6119                                                  "    uint currentResultItem;\n"
6120                                                  "};\n";
6121 
6122             rayPayloadInDefinitionVec.at(nLevel) = "layout(location = " + de::toString(nLocationsPerPayload * nLevel) +
6123                                                    ") rayPayloadInEXT block\n"
6124                                                    "{\n"
6125                                                    "    uint parentDepth;\n"
6126                                                    "    uint parentNOriginRay;\n"
6127                                                    "    uint parentResultItem;\n"
6128                                                    "};\n";
6129         }
6130 
6131         const std::string constantVariableDefinition =
6132             "layout(constant_id = 1) const uint MAX_RECURSIVE_DEPTH = " + de::toString(m_depthToUse) + ";\n";
6133 
6134         const char *resultBufferDefinition = "struct ResultData\n"
6135                                              "{\n"
6136                                              "    uint nOriginRay;\n"
6137                                              "    uint shaderStage;\n"
6138                                              "    uint depth;\n"
6139                                              "    uint callerResultItem;\n"
6140                                              "};\n"
6141                                              "\n"
6142                                              "layout(set = 0, binding = 0, std430) buffer result\n"
6143                                              "{\n"
6144                                              "    uint       nItemsStored;\n"
6145                                              "    uint       nCHitInvocations;\n"
6146                                              "    uint       nMissInvocations;\n"
6147                                              "    ResultData resultItems[];\n"
6148                                              "};\n";
6149 
6150         {
6151             m_ahitShaderNameVec.resize(m_depthToUse);
6152 
6153             for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6154             {
6155                 std::stringstream css;
6156 
6157                 css << "#version 460 core\n"
6158                        "\n"
6159                        "#extension GL_EXT_ray_tracing : require\n"
6160                        "\n" +
6161                            de::toString(resultBufferDefinition) + rayPayloadInDefinitionVec.at(nLevel) +
6162                            "\n"
6163                            "void main()\n"
6164                            "{\n"
6165                            /* Stub - don't care */
6166                            "}\n";
6167 
6168                 m_ahitShaderNameVec.at(nLevel) = std::string("ahit") + de::toString(nLevel);
6169 
6170                 programCollection.glslSources.add(m_ahitShaderNameVec.at(nLevel))
6171                     << glu::AnyHitSource(css.str()) << buildOptions;
6172             }
6173         }
6174 
6175         {
6176             m_chitShaderNameVec.resize(m_depthToUse);
6177 
6178             for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6179             {
6180                 std::stringstream css;
6181                 const bool shouldTraceRays = (nLevel != (m_depthToUse - 1));
6182 
6183                 css << "#version 460 core\n"
6184                        "\n"
6185                        "#extension GL_EXT_ray_tracing : require\n"
6186                        "\n"
6187                        "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
6188                        "\n" +
6189                            constantVariableDefinition + de::toString(resultBufferDefinition) +
6190                            de::toString(rayPayloadInDefinitionVec.at(nLevel));
6191 
6192                 if (shouldTraceRays)
6193                 {
6194                     css << rayPayloadDefinitionVec.at(nLevel + 1);
6195                 }
6196 
6197                 css << "\n"
6198                        "void main()\n"
6199                        "{\n"
6200                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
6201                        "\n"
6202                        "    atomicAdd(nCHitInvocations, 1);\n"
6203                        "\n"
6204                        "    if (nItem < " +
6205                            de::toString(m_nMaxResultItemsPermitted) +
6206                            ")\n"
6207                            "    {\n"
6208                            "        resultItems[nItem].callerResultItem = parentResultItem;\n"
6209                            "        resultItems[nItem].depth            = parentDepth;\n"
6210                            "        resultItems[nItem].nOriginRay       = parentNOriginRay;\n"
6211                            "        resultItems[nItem].shaderStage      = 1;\n"
6212                            "    }\n"
6213                            "\n";
6214 
6215                 if (shouldTraceRays)
6216                 {
6217                     css << "    if (parentDepth < MAX_RECURSIVE_DEPTH - 1)\n"
6218                            "    {\n"
6219                            "        currentDepth      = parentDepth + 1;\n"
6220                            "        currentNOriginRay = parentNOriginRay;\n"
6221                            "        currentResultItem = nItem;\n"
6222                            "\n"
6223                            "        vec3  cellStartXYZ  = vec3(0.0, 0.0, 0.0);\n"
6224                            "        vec3  cellEndXYZ    = cellStartXYZ + vec3(1.0);\n"
6225                            "        vec3  targetHit     = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
6226                            "        vec3  targetMiss    = targetHit + vec3(0, 10, 0);\n"
6227                            "        vec3  origin        = targetHit - vec3(1, 0,  0);\n"
6228                            "        vec3  directionHit  = normalize(targetHit  - origin);\n"
6229                            "        vec3  directionMiss = normalize(targetMiss - origin);\n"
6230                            "        uint  rayFlags      = 0;\n"
6231                            "        uint  cullMask      = 0xFF;\n"
6232                            "        float tmin          = 0.001;\n"
6233                            "        float tmax          = 5.0;\n"
6234                            "\n"
6235                            "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6236                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionHit,  tmax, " +
6237                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6238                                ");\n"
6239                                "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6240                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionMiss, tmax, " +
6241                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6242                                ");\n"
6243                                "    }\n"
6244                                "\n";
6245                 }
6246 
6247                 css << "}\n";
6248 
6249                 m_chitShaderNameVec.at(nLevel) = std::string("chit") + de::toString(nLevel);
6250 
6251                 programCollection.glslSources.add(m_chitShaderNameVec.at(nLevel))
6252                     << glu::ClosestHitSource(css.str()) << buildOptions;
6253             }
6254         }
6255 
6256         {
6257             std::stringstream css;
6258 
6259             css << "#version 460 core\n"
6260                    "\n"
6261                    "#extension GL_EXT_ray_tracing : require\n"
6262                    "\n"
6263                    "void main()\n"
6264                    "{\n"
6265                    "    reportIntersectionEXT(0.95f, 0);\n"
6266                    "}\n";
6267 
6268             // There is stack caching code that assumes it knows which shader groups are what, but that doesn't apply to
6269             // this test. The other hit group shaders don't hit this issue because they don't use the canonical name, so
6270             // de-canonicalize the name to work around that
6271             programCollection.glslSources.add("intersection0") << glu::IntersectionSource(css.str()) << buildOptions;
6272         }
6273 
6274         {
6275             m_missShaderNameVec.resize(m_depthToUse);
6276 
6277             for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6278             {
6279                 std::stringstream css;
6280                 const bool shouldTraceRays = (nLevel != (m_depthToUse - 1));
6281 
6282                 css << "#version 460 core\n"
6283                        "\n"
6284                        "#extension GL_EXT_ray_tracing : require\n"
6285                        "\n"
6286                        "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
6287                        "\n" +
6288                            constantVariableDefinition + de::toString(resultBufferDefinition) +
6289                            de::toString(rayPayloadInDefinitionVec.at(nLevel));
6290 
6291                 if (shouldTraceRays)
6292                 {
6293                     css << rayPayloadDefinitionVec.at(nLevel + 1);
6294                 }
6295 
6296                 css << "\n"
6297                        "void main()\n"
6298                        "{\n"
6299                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
6300                        "\n"
6301                        "    atomicAdd(nMissInvocations, 1);\n"
6302                        "\n"
6303                        "    if (nItem < " +
6304                            de::toString(m_nMaxResultItemsPermitted) +
6305                            ")\n"
6306                            "    {\n"
6307                            "        resultItems[nItem].depth            = parentDepth;\n"
6308                            "        resultItems[nItem].nOriginRay       = parentNOriginRay;\n"
6309                            "        resultItems[nItem].callerResultItem = parentResultItem;\n"
6310                            "        resultItems[nItem].shaderStage      = 2;\n"
6311                            "    }\n"
6312                            "\n";
6313 
6314                 if (shouldTraceRays)
6315                 {
6316                     css << "    if (parentDepth < MAX_RECURSIVE_DEPTH - 1)\n"
6317                            "    {\n"
6318                            "        currentDepth      = parentDepth + 1;\n"
6319                            "        currentNOriginRay = parentNOriginRay;\n"
6320                            "        currentResultItem = nItem;\n"
6321                            "\n"
6322                            "        vec3  cellStartXYZ  = vec3(0.0, 0.0, 0.0);\n"
6323                            "        vec3  cellEndXYZ    = cellStartXYZ + vec3(1.0);\n"
6324                            "        vec3  targetHit     = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
6325                            "        vec3  targetMiss    = targetHit + vec3(0, 10, 0);\n"
6326                            "        vec3  origin        = targetHit - vec3(1, 0,  0);\n"
6327                            "        vec3  directionHit  = normalize(targetHit  - origin);\n"
6328                            "        vec3  directionMiss = normalize(targetMiss - origin);\n"
6329                            "\n"
6330                            "        uint  rayFlags      = 0;\n"
6331                            "        uint  cullMask      = 0xFF;\n"
6332                            "        float tmin          = 0.001;\n"
6333                            "        float tmax          = 5.0;\n"
6334                            "\n"
6335                            "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6336                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionHit,  tmax, " +
6337                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6338                                ");\n"
6339                                "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6340                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionMiss, tmax, " +
6341                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6342                                ");\n"
6343                                "    }\n";
6344                 }
6345 
6346                 css << "}\n";
6347 
6348                 m_missShaderNameVec.at(nLevel) = "miss" + de::toString(nLevel);
6349 
6350                 programCollection.glslSources.add(m_missShaderNameVec.at(nLevel))
6351                     << glu::MissSource(css.str()) << buildOptions;
6352             }
6353         }
6354 
6355         {
6356             const std::string rayPayloadDefinition = ((m_depthToUse == 0u) ? "" : rayPayloadDefinitionVec.at(0));
6357 
6358             std::stringstream css;
6359 
6360             css << "#version 460 core\n"
6361                    "\n"
6362                    "#extension GL_EXT_ray_tracing : require\n"
6363                    "\n"
6364                    "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
6365                    "\n" +
6366                        de::toString(resultBufferDefinition) + rayPayloadDefinition +
6367                        "void main()\n"
6368                        "{\n"
6369                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
6370                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
6371                        "    uint  rayFlags     = 0;\n"
6372                        "    float tmin         = 0.001;\n"
6373                        "    float tmax         = 9.0;\n"
6374                        "\n"
6375                        "    uint  cullMask      = 0xFF;\n"
6376                        "    vec3  cellStartXYZ  = vec3(0.0, 0.0, 0.0);\n"
6377                        "    vec3  cellEndXYZ    = cellStartXYZ + vec3(1.0);\n"
6378                        "    vec3  targetHit     = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
6379                        "    vec3  targetMiss    = targetHit + vec3(0, 10, 0);\n"
6380                        "    vec3  origin        = targetHit - vec3(1, 0,  0);\n"
6381                        "    vec3  directionHit  = normalize(targetHit  - origin);\n"
6382                        "    vec3  directionMiss = normalize(targetMiss - origin);\n"
6383                        "\n"
6384                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
6385                        "\n"
6386                        "    if (nItem < " +
6387                        de::toString(m_nMaxResultItemsPermitted) +
6388                        ")\n"
6389                        "    {\n"
6390                        "        resultItems[nItem].callerResultItem = 0xFFFFFFFF;\n"
6391                        "        resultItems[nItem].depth            = 0;\n"
6392                        "        resultItems[nItem].nOriginRay       = nInvocation;\n"
6393                        "        resultItems[nItem].shaderStage      = 3;\n"
6394                        "    }\n"
6395                        "\n" +
6396                        ((m_depthToUse == 0u) ? "" :
6397                                                "    currentDepth      = 0;\n"
6398                                                "    currentNOriginRay = nInvocation;\n"
6399                                                "    currentResultItem = nItem;\n"
6400                                                "\n"
6401                                                "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, "
6402                                                "origin, tmin, directionHit,  tmax, 0);\n"
6403                                                "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, "
6404                                                "origin, tmin, directionMiss, tmax, 0);\n") +
6405                        "}\n";
6406 
6407             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
6408         }
6409     }
6410 
resetTLAS()6411     void resetTLAS() final
6412     {
6413         m_tlPtr.reset();
6414     }
6415 
verifyResultBuffer(const void * resultDataPtr) const6416     bool verifyResultBuffer(const void *resultDataPtr) const final
6417     {
6418         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
6419         bool result                  = false;
6420         auto nItemsStored            = *resultU32Ptr;
6421         const auto nCHitInvocations  = *(resultU32Ptr + 1);
6422         const auto nMissInvocations  = *(resultU32Ptr + 2);
6423         const bool doFullCheck       = (m_nResultItemsExpected < m_nMaxResultItemsPermitted);
6424 
6425         struct ResultItem
6426         {
6427             uint32_t depth;
6428             uint32_t nOriginRay;
6429             uint32_t nParentNode;
6430 
6431             VkShaderStageFlagBits stage;
6432 
6433             ResultItem *childCHitNodePtr;
6434             ResultItem *childMissNodePtr;
6435 
6436             ResultItem()
6437                 : depth(0xFFFFFFFFu)
6438                 , nOriginRay(0xFFFFFFFFu)
6439                 , nParentNode(0xFFFFFFFFu)
6440                 , stage(VK_SHADER_STAGE_ALL)
6441                 , childCHitNodePtr(nullptr)
6442                 , childMissNodePtr(nullptr)
6443             {
6444                 /* Stub */
6445             }
6446         };
6447 
6448         std::map<uint32_t, ResultItem *> nItemToResultItemPtrMap;
6449         std::map<uint32_t, std::vector<ResultItem *>> nLevelToResultItemPtrVecMap;
6450         std::vector<std::unique_ptr<ResultItem>> resultItemPtrVec;
6451 
6452         uint32_t rayCount;
6453         std::map<uint32_t, std::vector<std::pair<const uint32_t *, uint32_t>>> nRayToResultItemPtrIndexVecMap;
6454 
6455         if (doFullCheck)
6456         {
6457             if (nItemsStored != m_nResultItemsExpected)
6458             {
6459                 goto end;
6460             }
6461         }
6462         else
6463         {
6464             // Test shaders always use an atomic add to obtain a unique index, at which they should write the result item.
6465             // Hence, the value we read back from the result buffer's preamble does not actually indicate how many items
6466             // are available for reading, since a partial (!= full) check implies our result buffer only contains a fraction
6467             // of all expected items (since more items would simply not fit in).
6468             //
6469             // Make sure to use a correct value in subsequent checks.
6470             if (nItemsStored < m_nResultItemsExpected)
6471             {
6472                 goto end;
6473             }
6474 
6475             nItemsStored = m_nMaxResultItemsPermitted;
6476         }
6477 
6478         if (nCHitInvocations != m_nCHitInvocationsExpected)
6479         {
6480             goto end;
6481         }
6482 
6483         if (nMissInvocations != m_nMissInvocationsExpected)
6484         {
6485             goto end;
6486         }
6487 
6488         /*
6489          * We are creating a map of rays, each of which has a list of result items for that ray,
6490          * so we can verify each ray sequentially and save memory on the temporary maps.
6491          */
6492         for (uint32_t nItem = 0; nItem < nItemsStored; ++nItem)
6493         {
6494             const uint32_t *currentItemU32Ptr = resultU32Ptr +
6495                                                 3 /* nItemsRegistered, nCHitInvocations, nMissInvocations*/ +
6496                                                 4 /* items per result item */ * nItem;
6497             uint32_t nOriginRay = *(currentItemU32Ptr + 0);
6498             nRayToResultItemPtrIndexVecMap[nOriginRay].push_back(std::make_pair(currentItemU32Ptr, nItem));
6499         }
6500 
6501         /*
6502          * Convert an array of result items, stored in undefined order, to a representation we can easily verify.
6503          * Loop to verify result items with the same ray id in each iteration.
6504          */
6505         rayCount = getDispatchSize()[0] * getDispatchSize()[1] * getDispatchSize()[2];
6506         for (uint32_t nRay = 0; nRay < rayCount; ++nRay)
6507         {
6508             // If the nRay is not in the map, an empty vector will be created,
6509             // and the subsequent verification will be simplified in this case.
6510             const std::vector<std::pair<const uint32_t *, uint32_t>> &currentItemU32PtrIndexVec =
6511                 nRayToResultItemPtrIndexVecMap[nRay];
6512             for (const auto &iterator1 : currentItemU32PtrIndexVec)
6513             {
6514                 const uint32_t *currentItemU32Ptr = iterator1.first;
6515                 uint32_t nItem                    = iterator1.second;
6516 
6517                 std::unique_ptr<ResultItem> resultItemPtr;
6518                 resultItemPtr.reset(new ResultItem());
6519 
6520                 resultItemPtr->depth       = *(currentItemU32Ptr + 2);
6521                 resultItemPtr->nOriginRay  = *(currentItemU32Ptr + 0);
6522                 resultItemPtr->nParentNode = *(currentItemU32Ptr + 3);
6523 
6524                 switch (*(currentItemU32Ptr + 1))
6525                 {
6526                 case 1:
6527                     resultItemPtr->stage = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR;
6528                     break;
6529                 case 2:
6530                     resultItemPtr->stage = VK_SHADER_STAGE_MISS_BIT_KHR;
6531                     break;
6532                 case 3:
6533                     resultItemPtr->stage = VK_SHADER_STAGE_RAYGEN_BIT_KHR;
6534                     break;
6535 
6536                 default:
6537                 {
6538                     /* This should never happen */
6539                     DE_ASSERT(false);
6540 
6541                     goto end;
6542                 }
6543                 }
6544 
6545                 if (resultItemPtr->depth >= m_depthToUse && m_depthToUse > 0u)
6546                 {
6547                     DE_ASSERT(resultItemPtr->depth < m_depthToUse);
6548 
6549                     goto end;
6550                 }
6551 
6552                 if (resultItemPtr->nOriginRay >= m_nRaysToTest)
6553                 {
6554                     DE_ASSERT(resultItemPtr->nOriginRay < m_nRaysToTest);
6555 
6556                     goto end;
6557                 }
6558 
6559                 nItemToResultItemPtrMap[nItem] = resultItemPtr.get();
6560 
6561                 nLevelToResultItemPtrVecMap[resultItemPtr->depth].push_back(resultItemPtr.get());
6562                 resultItemPtrVec.push_back(std::move(resultItemPtr));
6563             }
6564 
6565             if (nLevelToResultItemPtrVecMap.empty())
6566             {
6567                 continue;
6568             }
6569 
6570             if (doFullCheck)
6571             {
6572                 uint32_t nRayGenShaderResultItemsFound = 0;
6573 
6574                 for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6575                 {
6576                     const auto &currentResultItemPtrVec = iterator1.second;
6577 
6578                     for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6579                     {
6580                         if (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
6581                         {
6582                             if (currentResultItemPtr->nParentNode != 0xFFFFFFFF)
6583                             {
6584                                 DE_ASSERT(currentResultItemPtr->nParentNode == 0xFFFFFFFF);
6585 
6586                                 goto end;
6587                             }
6588 
6589                             nRayGenShaderResultItemsFound++;
6590                         }
6591                         else if (currentResultItemPtr->stage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
6592                         {
6593                             DE_ASSERT(currentResultItemPtr->nParentNode < nItemsStored);
6594 
6595                             auto parentNodePtr = nItemToResultItemPtrMap.at(currentResultItemPtr->nParentNode);
6596 
6597                             if (parentNodePtr->childCHitNodePtr != nullptr)
6598                             {
6599                                 DE_ASSERT(parentNodePtr->childCHitNodePtr == nullptr);
6600 
6601                                 goto end;
6602                             }
6603 
6604                             parentNodePtr->childCHitNodePtr = currentResultItemPtr;
6605                         }
6606                         else
6607                         {
6608                             DE_ASSERT(currentResultItemPtr->stage == VK_SHADER_STAGE_MISS_BIT_KHR);
6609                             DE_ASSERT(currentResultItemPtr->nParentNode < nItemsStored);
6610 
6611                             auto parentNodePtr = nItemToResultItemPtrMap.at(currentResultItemPtr->nParentNode);
6612 
6613                             if (parentNodePtr->childMissNodePtr != nullptr)
6614                             {
6615                                 DE_ASSERT(parentNodePtr->childMissNodePtr == nullptr);
6616 
6617                                 goto end;
6618                             }
6619 
6620                             parentNodePtr->childMissNodePtr = currentResultItemPtr;
6621                         }
6622                     }
6623                 }
6624 
6625                 if (nRayGenShaderResultItemsFound != 1)
6626                 {
6627                     DE_ASSERT(nRayGenShaderResultItemsFound == 1);
6628 
6629                     goto end;
6630                 }
6631             }
6632 
6633             // 1. Verify all nodes that are not leaves have both child nodes attached, and that leaf nodes do not have any children assigned.
6634             if (doFullCheck)
6635             {
6636                 for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6637                 {
6638                     const auto &currentNLevel           = iterator1.first;
6639                     const auto &currentResultItemPtrVec = iterator1.second;
6640 
6641                     for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6642                     {
6643                         if (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR ||
6644                             currentNLevel != m_depthToUse - 1)
6645                         {
6646                             if (currentResultItemPtr->childCHitNodePtr == nullptr && m_depthToUse > 0u)
6647                             {
6648                                 DE_ASSERT(currentResultItemPtr->childCHitNodePtr != nullptr);
6649 
6650                                 goto end;
6651                             }
6652 
6653                             if (currentResultItemPtr->childMissNodePtr == nullptr && m_depthToUse > 0u)
6654                             {
6655                                 DE_ASSERT(currentResultItemPtr->childMissNodePtr != nullptr);
6656 
6657                                 goto end;
6658                             }
6659                         }
6660                         else
6661                         {
6662                             if (currentResultItemPtr->childCHitNodePtr != nullptr)
6663                             {
6664                                 DE_ASSERT(currentResultItemPtr->childCHitNodePtr == nullptr);
6665 
6666                                 goto end;
6667                             }
6668 
6669                             if (currentResultItemPtr->childMissNodePtr != nullptr)
6670                             {
6671                                 DE_ASSERT(currentResultItemPtr->childMissNodePtr == nullptr);
6672 
6673                                 goto end;
6674                             }
6675                         }
6676                     }
6677                 }
6678             }
6679 
6680             // 2. Verify depth level is correct for each node.
6681             for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6682             {
6683                 const auto &currentNLevel           = iterator1.first;
6684                 const auto &currentResultItemPtrVec = iterator1.second;
6685 
6686                 for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6687                 {
6688                     if (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
6689                     {
6690                         if (currentResultItemPtr->depth != 0)
6691                         {
6692                             DE_ASSERT(currentResultItemPtr->depth == 0);
6693 
6694                             goto end;
6695                         }
6696                     }
6697                     else if (currentResultItemPtr->depth != currentNLevel)
6698                     {
6699                         DE_ASSERT(currentResultItemPtr->depth == currentNLevel);
6700 
6701                         goto end;
6702                     }
6703                 }
6704             }
6705 
6706             // 3. Verify child node ptrs point to nodes that are assigned correct shader stage.
6707             for (const auto &iterator : nItemToResultItemPtrMap)
6708             {
6709                 const auto &currentResultItemPtr = iterator.second;
6710 
6711                 if (currentResultItemPtr->childCHitNodePtr != nullptr &&
6712                     currentResultItemPtr->childCHitNodePtr->stage != VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
6713                 {
6714                     DE_ASSERT(currentResultItemPtr->childCHitNodePtr->stage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
6715 
6716                     goto end;
6717                 }
6718 
6719                 if (currentResultItemPtr->childMissNodePtr != nullptr &&
6720                     currentResultItemPtr->childMissNodePtr->stage != VK_SHADER_STAGE_MISS_BIT_KHR)
6721                 {
6722                     DE_ASSERT(currentResultItemPtr->childMissNodePtr->stage = VK_SHADER_STAGE_MISS_BIT_KHR);
6723 
6724                     goto end;
6725                 }
6726             }
6727 
6728             // 4. Verify child nodes are assigned correct depth levels.
6729             for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6730             {
6731                 const auto &currentNLevel           = iterator1.first;
6732                 const auto &currentResultItemPtrVec = iterator1.second;
6733 
6734                 for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6735                 {
6736                     const auto expectedChildNodeDepth =
6737                         (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR) ?
6738                             0 :
6739                             currentResultItemPtr->depth + 1;
6740 
6741                     if (currentResultItemPtr->depth != currentNLevel)
6742                     {
6743                         DE_ASSERT(currentResultItemPtr->depth == currentNLevel);
6744 
6745                         goto end;
6746                     }
6747 
6748                     if (currentResultItemPtr->childCHitNodePtr != nullptr &&
6749                         currentResultItemPtr->childCHitNodePtr->depth != expectedChildNodeDepth)
6750                     {
6751                         DE_ASSERT(currentResultItemPtr->childCHitNodePtr->depth == expectedChildNodeDepth);
6752 
6753                         goto end;
6754                     }
6755 
6756                     if (currentResultItemPtr->childMissNodePtr != nullptr &&
6757                         currentResultItemPtr->childMissNodePtr->depth != expectedChildNodeDepth)
6758                     {
6759                         DE_ASSERT(currentResultItemPtr->childMissNodePtr->depth == expectedChildNodeDepth);
6760 
6761                         goto end;
6762                     }
6763                 }
6764             }
6765 
6766             // 5. Verify that RT shader stages were invoked for all anticipated recursion levels.
6767             if (doFullCheck)
6768             {
6769                 for (uint32_t nLevel = 0; nLevel < m_depthToUse; nLevel++)
6770                 {
6771                     if (nLevelToResultItemPtrVecMap.find(nLevel) == nLevelToResultItemPtrVecMap.end())
6772                     {
6773                         DE_ASSERT(false);
6774 
6775                         goto end;
6776                     }
6777                 }
6778             }
6779 
6780             /* clear containers before next iteration */
6781             {
6782                 nItemToResultItemPtrMap.clear();
6783 
6784                 /* clear nLevelToResultItemPtrVecMap */
6785                 for (auto &iterator1 : nLevelToResultItemPtrVecMap)
6786                 {
6787                     iterator1.second.clear();
6788                 }
6789                 nLevelToResultItemPtrVecMap.clear();
6790 
6791                 resultItemPtrVec.clear();
6792             }
6793 
6794         } // end for (uint32_t nRay = 0; nRay < rayCount; ++nRay)
6795 
6796         result = true;
6797     end:
6798         return result;
6799     }
6800 
6801 private:
6802     const AccelerationStructureLayout m_asStructureLayout;
6803     const GeometryType m_geometryType;
6804 
6805     uint32_t m_depthToUse;
6806     uint32_t m_nMaxResultItemsPermitted;
6807     const uint32_t m_nRaysToTest;
6808     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
6809 
6810     VkSpecializationInfo m_specializationInfo;
6811     VkSpecializationMapEntry m_specializationEntry;
6812 
6813     mutable std::vector<std::string> m_ahitShaderNameVec;
6814     mutable std::vector<std::string> m_chitShaderNameVec;
6815     mutable std::vector<std::string> m_missShaderNameVec;
6816 
6817     uint32_t m_nCHitInvocationsExpected;
6818     uint32_t m_nMissInvocationsExpected;
6819     uint32_t m_nResultItemsExpected;
6820 
6821     const uint32_t m_maxResultBufferSizePermitted;
6822 };
6823 
6824 // Test the return value of reportIntersectionEXT
6825 class ReportIntersectionResultTest : public TestBase
6826 {
6827 public:
ReportIntersectionResultTest(const AccelerationStructureLayout & asLayout,const GeometryType & geometryType)6828     ReportIntersectionResultTest(const AccelerationStructureLayout &asLayout, const GeometryType &geometryType)
6829         : m_asLayout(asLayout)
6830         , m_geometryType(geometryType)
6831         , m_gridSizeXYZ(tcu::UVec3(4, 4, 1))
6832         , m_nRaysToTrace(16)
6833     {
6834     }
6835 
getCHitShaderCollectionShaderNames() const6836     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
6837     {
6838         return {};
6839     }
6840 
getDispatchSize() const6841     tcu::UVec3 getDispatchSize() const final
6842     {
6843         return m_gridSizeXYZ;
6844     }
6845 
getResultBufferSize() const6846     uint32_t getResultBufferSize() const final
6847     {
6848         return static_cast<uint32_t>(2u * sizeof(uint32_t) * m_nRaysToTrace);
6849     }
6850 
getTLASPtrVecToBind() const6851     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
6852     {
6853         return {m_tlPtr.get()};
6854     }
6855 
resetTLAS()6856     void resetTLAS() final
6857     {
6858         m_tlPtr.reset();
6859     }
6860 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)6861     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
6862                 VkCommandBuffer commandBuffer) final
6863     {
6864         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),                         // gridStartXYZ
6865                                                  tcu::Vec3(1, 1, 1),                         // gridCellSizeXYZ
6866                                                  m_gridSizeXYZ, tcu::Vec3(2.0f, 2.0f, 2.0f), // gridInterCellDeltaXYZ
6867                                                  m_geometryType));
6868 
6869         m_tlPtr = m_asProviderPtr->createTLAS(context, m_asLayout, commandBuffer, 0u,
6870                                               nullptr,  // optASPropertyProviderPtr
6871                                               nullptr); // optASFedbackPtr
6872     }
6873 
initPrograms(SourceCollections & programCollection) const6874     void initPrograms(SourceCollections &programCollection) const final
6875     {
6876         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
6877         const std::string hitPropertiesDefinition  = "struct HitProperties\n"
6878                                                      "{\n"
6879                                                      "    uint nHitsRejected;\n"
6880                                                      "    uint nHitsAccepteded;\n"
6881                                                      "};\n";
6882         const std::string hitPropertiesDeclaration = "layout(set = 0, binding = 0, std430) buffer result\n"
6883                                                      "{\n"
6884                                                      "    HitProperties rayToHitProps[" +
6885                                                      de::toString(m_nRaysToTrace) +
6886                                                      "];\n"
6887                                                      "};\n";
6888 
6889         programCollection.glslSources.add("ahit")
6890             << glu::AnyHitSource(std::string() +
6891                                  "#version 460 core\n"
6892                                  "\n"
6893                                  "#extension GL_EXT_ray_tracing : require\n"
6894                                  "\n"
6895                                  "hitAttributeEXT vec3 unusedAttribute;\n"
6896                                  "\n" +
6897                                  hitPropertiesDefinition +
6898                                  "\n"
6899                                  "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n" +
6900                                  hitPropertiesDeclaration +
6901                                  "\n"
6902                                  "void main()\n"
6903                                  "{\n"
6904                                  "    uint nRay = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
6905                                  "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
6906                                  "    if ((gl_RayTmaxEXT > 0.6) && (gl_RayTmaxEXT < 0.8))\n"
6907                                  "    {\n"
6908                                  "        atomicAdd(rayToHitProps[nRay].nHitsRejected, 1);\n"
6909                                  "        ignoreIntersectionEXT;\n" // reportIntersectionEXT should return false
6910                                  "    }\n"
6911                                  "    else if ((gl_RayTmaxEXT > 0.1) && (gl_RayTmaxEXT < 0.3))\n"
6912                                  "    {\n"
6913                                  "        atomicAdd(rayToHitProps[nRay].nHitsAccepteded, 1);\n"
6914                                  "    }\n"
6915                                  "}\n")
6916             << buildOptions;
6917 
6918         programCollection.glslSources.add("intersection")
6919             << glu::IntersectionSource("#version 460 core\n"
6920                                        "#extension GL_EXT_ray_tracing : require\n"
6921                                        "\n"
6922                                        "hitAttributeEXT vec3 hitAttribute;\n"
6923                                        "\n"
6924                                        "void main()\n"
6925                                        "{\n"
6926                                        "    bool resultThatShouldBeRejected = reportIntersectionEXT(0.7f, 0);\n"
6927                                        "    if (resultThatShouldBeRejected)\n"
6928                                        "        reportIntersectionEXT(0.7f, 0);\n"
6929                                        "    else\n"
6930                                        "    {\n"
6931                                        "         bool resultThatShouldBeAccepted = reportIntersectionEXT(0.2f, 0);\n"
6932                                        "         if (!resultThatShouldBeAccepted)\n"
6933                                        "             reportIntersectionEXT(0.2f, 0);\n"
6934                                        "    }\n"
6935                                        "}\n")
6936             << buildOptions;
6937 
6938         programCollection.glslSources.add("miss")
6939             << glu::MissSource(std::string() +
6940                                "#version 460 core\n"
6941                                "\n"
6942                                "#extension GL_EXT_ray_tracing : require\n"
6943                                "\n" +
6944                                hitPropertiesDefinition + "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n" +
6945                                hitPropertiesDeclaration +
6946                                "\n"
6947                                "void main()\n"
6948                                "{\n"
6949                                "}\n")
6950             << buildOptions;
6951 
6952         programCollection.glslSources.add("rgen")
6953             << glu::RaygenSource(
6954                    "#version 460 core\n"
6955                    "\n"
6956                    "#extension GL_EXT_ray_tracing : require\n"
6957                    "\n" +
6958                    hitPropertiesDefinition +
6959                    "layout(location = 0)              rayPayloadEXT vec3                     unusedPayload;\n"
6960                    "layout(set      = 0, binding = 1) uniform       accelerationStructureEXT topLevelAS;\n"
6961                    "\n"
6962                    "void main()\n"
6963                    "{\n"
6964                    "    uint  rayFlags    = 0;\n"
6965                    "    uint  cullMask    = 0xFF;\n"
6966                    "    float tmin        = 0.001;\n"
6967                    "    float tmax        = 9.0;\n"
6968                    "    vec3  origin      = vec3(4, 4, 4);\n"
6969                    "    vec3  target      = vec3(float(gl_LaunchIDEXT.x * 2) + 0.5f, float(gl_LaunchIDEXT.y * 2) + "
6970                    "0.5f, float(gl_LaunchIDEXT.z * 2) + 0.5f);\n"
6971                    "    vec3  direct      = normalize(target - origin);\n"
6972                    "\n"
6973                    "    traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
6974                    "}\n")
6975             << buildOptions;
6976     }
6977 
verifyResultBuffer(const void * resultDataPtr) const6978     bool verifyResultBuffer(const void *resultDataPtr) const final
6979     {
6980         for (uint32_t nRay = 0; nRay < m_nRaysToTrace; ++nRay)
6981         {
6982             const uint32_t *rayProps = reinterpret_cast<const uint32_t *>(resultDataPtr) + 2 * nRay;
6983             if ((rayProps[0] != 1) || (rayProps[1] != 1))
6984                 return false;
6985         }
6986         return true;
6987     }
6988 
6989 private:
6990     const AccelerationStructureLayout m_asLayout;
6991     const GeometryType m_geometryType;
6992     const tcu::UVec3 m_gridSizeXYZ;
6993     const uint32_t m_nRaysToTrace;
6994 
6995     std::unique_ptr<GridASProvider> m_asProviderPtr;
6996     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
6997 };
6998 
6999 class RayPayloadInTest : public TestBase
7000 {
7001 public:
RayPayloadInTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)7002     RayPayloadInTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
7003         : m_asStructureLayout(asStructureLayout)
7004         , m_geometryType(geometryType)
7005         , m_gridSizeXYZ(tcu::UVec3(512, 1, 1))
7006         , m_nRayPayloadU32s(512)
7007     {
7008     }
7009 
~RayPayloadInTest()7010     ~RayPayloadInTest()
7011     {
7012         /* Stub */
7013     }
7014 
getDispatchSize() const7015     tcu::UVec3 getDispatchSize() const final
7016     {
7017         DE_ASSERT(m_gridSizeXYZ[0] != 0);
7018         DE_ASSERT(m_gridSizeXYZ[1] != 0);
7019         DE_ASSERT(m_gridSizeXYZ[2] != 0);
7020 
7021         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
7022     }
7023 
getResultBufferSize() const7024     uint32_t getResultBufferSize() const final
7025     {
7026         DE_ASSERT(m_gridSizeXYZ[0] != 0);
7027         DE_ASSERT(m_gridSizeXYZ[1] != 0);
7028         DE_ASSERT(m_gridSizeXYZ[2] != 0);
7029         DE_ASSERT(m_nRayPayloadU32s != 0);
7030 
7031         const auto nRays = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
7032 
7033         DE_ASSERT(nRays != 0);
7034         DE_ASSERT((nRays % 2) == 0);
7035 
7036         const auto nMissShaderInvocationsExpected = nRays / 2;
7037         const auto nAHitShaderInvocationsExpected = nRays / 2;
7038         const auto nCHitShaderInvocationsExpected = nAHitShaderInvocationsExpected;
7039         const auto nResultStoresExpected =
7040             nMissShaderInvocationsExpected + nAHitShaderInvocationsExpected + nCHitShaderInvocationsExpected;
7041 
7042         return static_cast<uint32_t>((1 /* nItems */ + m_nRayPayloadU32s * nResultStoresExpected) * sizeof(uint32_t));
7043     }
7044 
getSpecializationInfoPtr(const VkShaderStageFlagBits & shaderStage)7045     VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits &shaderStage) final
7046     {
7047         VkSpecializationInfo *resultPtr = nullptr;
7048 
7049         if (shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR || shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR ||
7050             shaderStage == VK_SHADER_STAGE_ANY_HIT_BIT_KHR || shaderStage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
7051         {
7052             resultPtr = &m_specializationInfo;
7053         }
7054 
7055         return resultPtr;
7056     }
7057 
getTLASPtrVecToBind() const7058     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
7059     {
7060         DE_ASSERT(m_tlPtr != nullptr);
7061 
7062         return {m_tlPtr.get()};
7063     }
7064 
init(vkt::Context &,RayTracingProperties *)7065     bool init(vkt::Context & /* context           */, RayTracingProperties * /* rtPropertiesPtr */) final
7066     {
7067         m_specializationInfoMapEntry.constantID = 1;
7068         m_specializationInfoMapEntry.offset     = 0;
7069         m_specializationInfoMapEntry.size       = sizeof(uint32_t);
7070 
7071         m_specializationInfo.dataSize      = sizeof(uint32_t);
7072         m_specializationInfo.mapEntryCount = 1;
7073         m_specializationInfo.pData         = reinterpret_cast<const void *>(&m_nRayPayloadU32s);
7074         m_specializationInfo.pMapEntries   = &m_specializationInfoMapEntry;
7075 
7076         return true;
7077     }
7078 
resetTLAS()7079     void resetTLAS() final
7080     {
7081         m_tlPtr.reset();
7082     }
7083 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)7084     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
7085                 VkCommandBuffer commandBuffer) final
7086     {
7087         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
7088                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
7089                                                                          m_gridSizeXYZ,
7090                                                                          tcu::Vec3(6, 0, 0), /* gridInterCellDeltaXYZ */
7091                                                                          m_geometryType));
7092 
7093         m_tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
7094                                             VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR, nullptr, nullptr);
7095     }
7096 
initPrograms(SourceCollections & programCollection) const7097     void initPrograms(SourceCollections &programCollection) const final
7098     {
7099         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
7100                                                   0u,    /* flags        */
7101                                                   true); /* allowSpirv14 */
7102 
7103         const char *constantDefinitions = "layout(constant_id = 1) const uint N_UINTS_IN_RAY_PAYLOAD = 1;\n";
7104 
7105         const char *rayPayloadDefinition = "\n"
7106                                            "layout(location = 0) rayPayloadEXT block\n"
7107                                            "{\n"
7108                                            "    uint values[N_UINTS_IN_RAY_PAYLOAD];\n"
7109                                            "};\n"
7110                                            "\n";
7111 
7112         const char *rayPayloadInDefinition = "\n"
7113                                              "layout(location = 0) rayPayloadInEXT block\n"
7114                                              "{\n"
7115                                              "    uint values[N_UINTS_IN_RAY_PAYLOAD];\n"
7116                                              "};\n"
7117                                              "\n";
7118 
7119         const char *resultBufferDefinition = "layout(set      = 0, binding = 0, std430) buffer result\n"
7120                                              "{\n"
7121                                              "    uint nItemsStored;\n"
7122                                              "    uint resultValues[];\n"
7123                                              "};\n";
7124 
7125         {
7126             std::stringstream css;
7127 
7128             css << "#version 460 core\n"
7129                    "\n"
7130                    "#extension GL_EXT_ray_tracing : require\n"
7131                    "\n" +
7132                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
7133                        de::toString(rayPayloadInDefinition) +
7134                        "\n"
7135                        "void main()\n"
7136                        "{\n"
7137                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
7138                        "\n"
7139                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7140                        "    {\n"
7141                        "        resultValues[nItem * N_UINTS_IN_RAY_PAYLOAD + nUint] = values[nUint];\n"
7142                        "    }\n"
7143                        "}\n";
7144 
7145             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
7146         }
7147 
7148         {
7149             std::stringstream css;
7150 
7151             css << "#version 460 core\n"
7152                    "\n"
7153                    "#extension GL_EXT_ray_tracing : require\n"
7154                    "\n" +
7155                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
7156                        de::toString(rayPayloadInDefinition) +
7157                        "\n"
7158                        "void main()\n"
7159                        "{\n"
7160                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
7161                        "\n"
7162                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7163                        "    {\n"
7164                        "        resultValues[nItem * N_UINTS_IN_RAY_PAYLOAD + nUint] = values[nUint];\n"
7165                        "    }\n"
7166                        "}\n";
7167 
7168             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
7169         }
7170 
7171         {
7172             std::stringstream css;
7173 
7174             css << "#version 460 core\n"
7175                    "\n"
7176                    "#extension GL_EXT_ray_tracing : require\n"
7177                    "\n"
7178                    "void main()\n"
7179                    "{\n"
7180                    "    reportIntersectionEXT(0.95f, 0);\n"
7181                    "}\n";
7182 
7183             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
7184         }
7185 
7186         {
7187             std::stringstream css;
7188 
7189             css << "#version 460 core\n"
7190                    "\n"
7191                    "#extension GL_EXT_ray_tracing : require\n"
7192                    "\n" +
7193                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
7194                        de::toString(rayPayloadInDefinition) +
7195                        "\n"
7196                        "void main()\n"
7197                        "{\n"
7198                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
7199                        "\n"
7200                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7201                        "    {\n"
7202                        "        resultValues[nItem * N_UINTS_IN_RAY_PAYLOAD + nUint] = values[nUint];\n"
7203                        "    }\n"
7204                        "}\n";
7205 
7206             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
7207         }
7208 
7209         {
7210             std::stringstream css;
7211 
7212             css << "#version 460 core\n"
7213                    "\n"
7214                    "#extension GL_EXT_ray_tracing : require\n"
7215                    "\n"
7216                    "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
7217                    "\n" +
7218                        de::toString(constantDefinitions) + de::toString(rayPayloadDefinition) +
7219                        "void main()\n"
7220                        "{\n"
7221                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
7222                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
7223                        "    uint  rayFlags     = 0;\n"
7224                        "    float tmin         = 0.001;\n"
7225                        "    float tmax         = 2.1;\n"
7226                        "\n"
7227                        "    uint  cullMask     = 0xFF;\n"
7228                        "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
7229                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
7230                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
7231                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
7232                        "    vec3  direct       = normalize(target - origin);\n"
7233                        "\n"
7234                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7235                        "    {\n"
7236                        "        values[nUint] = (1 + nUint);\n"
7237                        "    }\n"
7238                        "\n"
7239                        "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, "
7240                        "tmax, 0);\n"
7241                        "}\n";
7242 
7243             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
7244         }
7245     }
7246 
verifyResultBuffer(const void * resultDataPtr) const7247     bool verifyResultBuffer(const void *resultDataPtr) const final
7248     {
7249         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(resultDataPtr);
7250         bool result                  = false;
7251 
7252         const auto nItemsStored                   = *resultU32Ptr;
7253         const auto nRays                          = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
7254         const auto nMissShaderInvocationsExpected = nRays / 2;
7255         const auto nAHitShaderInvocationsExpected = nRays / 2;
7256         const auto nCHitShaderInvocationsExpected = nAHitShaderInvocationsExpected;
7257         const auto nResultStoresExpected =
7258             nMissShaderInvocationsExpected + nAHitShaderInvocationsExpected + nCHitShaderInvocationsExpected;
7259 
7260         if (nItemsStored != nResultStoresExpected)
7261         {
7262             goto end;
7263         }
7264 
7265         for (uint32_t nItem = 0; nItem < nItemsStored; ++nItem)
7266         {
7267             const auto resultItemDataPtr = resultU32Ptr + 1 /* nItemsStored */ + nItem * m_nRayPayloadU32s;
7268 
7269             for (uint32_t nValue = 0; nValue < m_nRayPayloadU32s; ++nValue)
7270             {
7271                 if (resultItemDataPtr[nValue] != (1 + nValue))
7272                 {
7273                     goto end;
7274                 }
7275             }
7276         }
7277 
7278         result = true;
7279     end:
7280         return result;
7281     }
7282 
7283 private:
7284     const AccelerationStructureLayout m_asStructureLayout;
7285     const GeometryType m_geometryType;
7286 
7287     const tcu::UVec3 m_gridSizeXYZ;
7288     uint32_t m_nRayPayloadU32s;
7289     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
7290 
7291     VkSpecializationInfo m_specializationInfo;
7292     VkSpecializationMapEntry m_specializationInfoMapEntry;
7293 };
7294 
7295 class TerminationTest : public TestBase
7296 {
7297 public:
7298     enum class Mode
7299     {
7300         IGNORE_ANY_HIT_STATICALLY,
7301         IGNORE_ANY_HIT_DYNAMICALLY,
7302         TERMINATE_ANY_HIT_STATICALLY,
7303         TERMINATE_ANY_HIT_DYNAMICALLY,
7304         TERMINATE_INTERSECTION_STATICALLY,
7305         TERMINATE_INTERSECTION_DYNAMICALLY,
7306 
7307         UNKNOWN
7308     };
7309 
getModeFromTestType(const TestType & testType)7310     static Mode getModeFromTestType(const TestType &testType)
7311     {
7312         Mode result = Mode::UNKNOWN;
7313 
7314         switch (testType)
7315         {
7316         case TestType::IGNORE_ANY_HIT_DYNAMICALLY:
7317             result = Mode::IGNORE_ANY_HIT_DYNAMICALLY;
7318             break;
7319         case TestType::IGNORE_ANY_HIT_STATICALLY:
7320             result = Mode::IGNORE_ANY_HIT_STATICALLY;
7321             break;
7322         case TestType::TERMINATE_ANY_HIT_DYNAMICALLY:
7323             result = Mode::TERMINATE_ANY_HIT_DYNAMICALLY;
7324             break;
7325         case TestType::TERMINATE_ANY_HIT_STATICALLY:
7326             result = Mode::TERMINATE_ANY_HIT_STATICALLY;
7327             break;
7328         case TestType::TERMINATE_INTERSECTION_DYNAMICALLY:
7329             result = Mode::TERMINATE_INTERSECTION_DYNAMICALLY;
7330             break;
7331         case TestType::TERMINATE_INTERSECTION_STATICALLY:
7332             result = Mode::TERMINATE_INTERSECTION_STATICALLY;
7333             break;
7334 
7335         default:
7336         {
7337             DE_ASSERT(false && "This should never happen");
7338         }
7339         }
7340 
7341         return result;
7342     }
7343 
TerminationTest(const Mode & mode)7344     TerminationTest(const Mode &mode) : m_mode(mode)
7345     {
7346         /* Stub */
7347     }
7348 
~TerminationTest()7349     ~TerminationTest()
7350     {
7351         /* Stub */
7352     }
7353 
getCHitShaderCollectionShaderNames() const7354     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
7355     {
7356         return {};
7357     }
7358 
getDispatchSize() const7359     tcu::UVec3 getDispatchSize() const final
7360     {
7361         return tcu::UVec3(1, 1, 1);
7362     }
7363 
getResultBufferStartData() const7364     std::vector<uint8_t> getResultBufferStartData() const final
7365     {
7366         auto resultU8Vec      = std::vector<uint8_t>(getResultBufferSize());
7367         auto resultU32DataPtr = reinterpret_cast<uint32_t *>(resultU8Vec.data());
7368 
7369         memset(resultU8Vec.data(), 0, resultU8Vec.size());
7370 
7371         if (m_mode == Mode::IGNORE_ANY_HIT_DYNAMICALLY || m_mode == Mode::TERMINATE_ANY_HIT_DYNAMICALLY)
7372         {
7373             resultU32DataPtr[2] = 1;
7374         }
7375         else if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY)
7376         {
7377             resultU32DataPtr[3] = 1;
7378         }
7379 
7380         return resultU8Vec;
7381     }
7382 
getResultBufferSize() const7383     uint32_t getResultBufferSize() const final
7384     {
7385         const uint32_t nExtraUints =
7386             (m_mode == Mode::IGNORE_ANY_HIT_DYNAMICALLY || m_mode == Mode::TERMINATE_ANY_HIT_DYNAMICALLY ||
7387              m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY) ?
7388                 1 :
7389                 0;
7390         const uint32_t nResultUints =
7391             (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY || m_mode == Mode::TERMINATE_INTERSECTION_STATICALLY) ?
7392                 3 :
7393                 2;
7394 
7395         return static_cast<uint32_t>(sizeof(uint32_t)) * (nExtraUints + nResultUints);
7396     }
7397 
getTLASPtrVecToBind() const7398     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
7399     {
7400         return {m_tlPtr.get()};
7401     }
7402 
resetTLAS()7403     void resetTLAS() final
7404     {
7405         m_tlPtr.reset();
7406     }
7407 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)7408     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
7409                 VkCommandBuffer commandBuffer) final
7410     {
7411         if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY || m_mode == Mode::TERMINATE_INTERSECTION_STATICALLY)
7412         {
7413             const tcu::Vec3 gridCellSizeXYZ       = tcu::Vec3(2, 1, 1);
7414             const tcu::Vec3 gridInterCellDeltaXYZ = tcu::Vec3(3, 3, 3);
7415             const tcu::UVec3 gridSizeXYZ          = tcu::UVec3(1, 1, 1);
7416             const tcu::Vec3 gridStartXYZ          = tcu::Vec3(-1, -1, -1);
7417 
7418             m_asProviderPtr.reset(new GridASProvider(gridStartXYZ, gridCellSizeXYZ, gridSizeXYZ, gridInterCellDeltaXYZ,
7419                                                      GeometryType::AABB));
7420         }
7421         else
7422         {
7423             m_asProviderPtr.reset(new TriASProvider());
7424         }
7425 
7426         m_tlPtr = m_asProviderPtr->createTLAS(context, AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY,
7427                                               commandBuffer, VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
7428                                               nullptr,  /* optASPropertyProviderPtr */
7429                                               nullptr); /* optASFedbackPtr          */
7430     }
7431 
initPrograms(SourceCollections & programCollection) const7432     void initPrograms(SourceCollections &programCollection) const final
7433     {
7434         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
7435                                                   0u,    /* flags        */
7436                                                   true); /* allowSpirv14 */
7437 
7438         const std::string resultBufferSizeString = de::toString(getResultBufferSize() / sizeof(uint32_t));
7439 
7440         {
7441             std::string aHitShader;
7442 
7443             switch (m_mode)
7444             {
7445             case Mode::IGNORE_ANY_HIT_DYNAMICALLY:
7446             {
7447                 aHitShader = "#version 460 core\n"
7448                              "\n"
7449                              "#extension GL_EXT_ray_tracing : require\n"
7450                              "\n"
7451                              "hitAttributeEXT vec3 unusedAttribute;\n"
7452                              "\n"
7453                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7454                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7455                              "{\n"
7456                              "    uint resultData[" +
7457                              resultBufferSizeString +
7458                              "];\n"
7459                              "};\n"
7460                              "\n"
7461                              "void ignoreIntersectionWrapper()\n"
7462                              "{\n"
7463                              "    ignoreIntersectionEXT;\n"
7464                              "}\n"
7465                              "\n"
7466                              "void main()\n"
7467                              "{\n"
7468                              "\n"
7469                              "    if (resultData[2] == 1)\n"
7470                              "    {\n"
7471                              "        ignoreIntersectionWrapper();\n"
7472                              "    }\n"
7473                              "\n"
7474                              "    resultData[0] = 1;\n"
7475                              "}\n";
7476 
7477                 break;
7478             }
7479 
7480             case Mode::IGNORE_ANY_HIT_STATICALLY:
7481             {
7482                 aHitShader = "#version 460 core\n"
7483                              "\n"
7484                              "#extension GL_EXT_ray_tracing : require\n"
7485                              "\n"
7486                              "hitAttributeEXT vec3 unusedAttribute;\n"
7487                              "\n"
7488                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7489                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7490                              "{\n"
7491                              "    uint resultData[" +
7492                              resultBufferSizeString +
7493                              "];\n"
7494                              "};\n"
7495                              "\n"
7496                              "void ignoreIntersectionWrapper()\n"
7497                              "{\n"
7498                              "    ignoreIntersectionEXT;\n"
7499                              "}\n"
7500                              "\n"
7501                              "void main()\n"
7502                              "{\n"
7503                              "    ignoreIntersectionWrapper();\n"
7504                              "\n"
7505                              "    resultData[0] = 1;\n"
7506                              "}\n";
7507 
7508                 break;
7509             }
7510 
7511             case Mode::TERMINATE_ANY_HIT_DYNAMICALLY:
7512             {
7513                 aHitShader = "#version 460 core\n"
7514                              "\n"
7515                              "#extension GL_EXT_ray_tracing : require\n"
7516                              "\n"
7517                              "hitAttributeEXT vec3 unusedAttribute;\n"
7518                              "\n"
7519                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7520                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7521                              "{\n"
7522                              "    uint resultData[" +
7523                              resultBufferSizeString +
7524                              "];\n"
7525                              "};\n"
7526                              "\n"
7527                              "void terminateRayWrapper()\n"
7528                              "{\n"
7529                              "    terminateRayEXT;\n"
7530                              "}\n"
7531                              "\n"
7532                              "void main()\n"
7533                              "{\n"
7534                              "    if (resultData[2] == 1)\n"
7535                              "    {\n"
7536                              "        terminateRayWrapper();\n"
7537                              "    }\n"
7538                              "\n"
7539                              "    resultData[0] = 1;\n"
7540                              "}\n";
7541 
7542                 break;
7543             }
7544 
7545             case Mode::TERMINATE_ANY_HIT_STATICALLY:
7546             case Mode::TERMINATE_INTERSECTION_STATICALLY:
7547             {
7548                 aHitShader = "#version 460 core\n"
7549                              "\n"
7550                              "#extension GL_EXT_ray_tracing : require\n"
7551                              "\n"
7552                              "hitAttributeEXT vec3 unusedAttribute;\n"
7553                              "\n"
7554                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7555                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7556                              "{\n"
7557                              "    uint resultData[" +
7558                              resultBufferSizeString +
7559                              "];\n"
7560                              "};\n"
7561                              "\n"
7562                              "void terminateRayWrapper()\n"
7563                              "{\n"
7564                              "    terminateRayEXT;\n"
7565                              "}\n"
7566                              "\n"
7567                              "void main()\n"
7568                              "{\n"
7569                              "    terminateRayWrapper();\n"
7570                              "\n"
7571                              "    resultData[0] = 1;\n"
7572                              "}\n";
7573 
7574                 break;
7575             }
7576 
7577             case Mode::TERMINATE_INTERSECTION_DYNAMICALLY:
7578             {
7579                 aHitShader = "#version 460 core\n"
7580                              "\n"
7581                              "#extension GL_EXT_ray_tracing : require\n"
7582                              "\n"
7583                              "hitAttributeEXT vec3 unusedAttribute;\n"
7584                              "\n"
7585                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7586                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7587                              "{\n"
7588                              "    uint resultData[" +
7589                              resultBufferSizeString +
7590                              "];\n"
7591                              "};\n"
7592                              "\n"
7593                              "void terminateRayWrapper()\n"
7594                              "{\n"
7595                              "    terminateRayEXT;\n"
7596                              "}\n"
7597                              "\n"
7598                              "void main()\n"
7599                              "{\n"
7600                              "    if (resultData[3] == 1)\n"
7601                              "    {\n"
7602                              "        terminateRayWrapper();\n"
7603                              "    }\n"
7604                              "\n"
7605                              "    resultData[0] = 1;\n"
7606                              "}\n";
7607 
7608                 break;
7609             }
7610 
7611             default:
7612             {
7613                 DE_ASSERT(false);
7614             }
7615             }
7616 
7617             programCollection.glslSources.add("ahit") << glu::AnyHitSource(aHitShader) << buildOptions;
7618         }
7619 
7620         if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY || m_mode == Mode::TERMINATE_INTERSECTION_STATICALLY)
7621         {
7622             std::stringstream css;
7623 
7624             css << "#version 460 core\n"
7625                    "\n"
7626                    "#extension GL_EXT_ray_tracing : require\n"
7627                    "\n"
7628                    "hitAttributeEXT vec3 hitAttribute;\n"
7629                    "\n"
7630                    "layout(set = 0, binding = 0, std430) buffer result\n"
7631                    "{\n"
7632                    "    uint resultData[4];\n"
7633                    "};\n"
7634                    "\n"
7635                    "void generateIntersection()\n"
7636                    "{\n"
7637                    "    reportIntersectionEXT(0.95f, 0);\n"
7638                    "}\n"
7639                    "\n"
7640                    "void main()\n"
7641                    "{\n";
7642 
7643             if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY)
7644             {
7645                 css << "    if (resultData[3] == 1)\n"
7646                        "    {\n";
7647             }
7648 
7649             css << "    generateIntersection();\n";
7650 
7651             if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY)
7652             {
7653                 css << "    }\n";
7654             }
7655 
7656             css << "\n"
7657                    "    resultData[2] = 1;\n"
7658                    "}\n";
7659 
7660             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
7661         }
7662 
7663         {
7664             std::stringstream css;
7665 
7666             css << "#version 460 core\n"
7667                    "\n"
7668                    "#extension GL_EXT_ray_tracing : require\n"
7669                    "\n"
7670                    "layout(location = 0) rayPayloadInEXT      vec3   unusedPayload;\n"
7671                    "layout(set      = 0, binding = 0, std430) buffer result\n"
7672                    "{\n"
7673                    "    uint resultData[2];\n"
7674                    "};\n"
7675                    "\n"
7676                    "void main()\n"
7677                    "{\n"
7678                    "    resultData[1] = 1;\n"
7679                    "}\n";
7680 
7681             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
7682         }
7683 
7684         {
7685             std::stringstream css;
7686 
7687             css << "#version 460 core\n"
7688                    "\n"
7689                    "#extension GL_EXT_ray_tracing : require\n"
7690                    "\n"
7691                    "layout(location = 0)              rayPayloadEXT vec3                     unusedPayload;\n"
7692                    "layout(set      = 0, binding = 1) uniform       accelerationStructureEXT topLevelAS;\n"
7693                    "\n"
7694                    "void main()\n"
7695                    "{\n"
7696                    "    uint  nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
7697                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
7698                    "    uint  rayFlags    = 0;\n"
7699                    "    uint  cullMask    = 0xFF;\n"
7700                    "    float tmin        = 0.001;\n"
7701                    "    float tmax        = 9.0;\n"
7702                    "    vec3  origin      = vec3(-1,  -1,  -1);\n"
7703                    "    vec3  target      = vec3(0.0, 0.5,  0);\n"
7704                    "    vec3  direct      = normalize(target - origin);\n"
7705                    "\n"
7706                    "    traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
7707                    "}\n";
7708 
7709             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
7710         }
7711     }
7712 
verifyResultBuffer(const void * resultDataPtr) const7713     bool verifyResultBuffer(const void *resultDataPtr) const final
7714     {
7715         const uint32_t *resultU32DataPtr = reinterpret_cast<const uint32_t *>(resultDataPtr);
7716         bool result                      = false;
7717 
7718         switch (m_mode)
7719         {
7720         case Mode::IGNORE_ANY_HIT_DYNAMICALLY:
7721         case Mode::IGNORE_ANY_HIT_STATICALLY:
7722         {
7723             if (resultU32DataPtr[0] != 0 || resultU32DataPtr[1] != 1)
7724             {
7725                 goto end;
7726             }
7727 
7728             result = true;
7729 
7730             break;
7731         }
7732 
7733         case Mode::TERMINATE_ANY_HIT_DYNAMICALLY:
7734         case Mode::TERMINATE_ANY_HIT_STATICALLY:
7735         {
7736             if (resultU32DataPtr[0] != 0 || resultU32DataPtr[1] != 0)
7737             {
7738                 goto end;
7739             }
7740 
7741             result = true;
7742 
7743             break;
7744         }
7745 
7746         case Mode::TERMINATE_INTERSECTION_DYNAMICALLY:
7747         case Mode::TERMINATE_INTERSECTION_STATICALLY:
7748         {
7749             if (resultU32DataPtr[0] != 0 || resultU32DataPtr[1] != 0 || resultU32DataPtr[2] != 0)
7750             {
7751                 goto end;
7752             }
7753 
7754             result = true;
7755 
7756             break;
7757         }
7758 
7759         default:
7760         {
7761             TCU_FAIL("This should never be reached");
7762         }
7763         }
7764 
7765     end:
7766         return result;
7767     }
7768 
7769 private:
7770     std::unique_ptr<ASProviderBase> m_asProviderPtr;
7771     const Mode m_mode;
7772     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
7773 };
7774 
7775 /* Generic misc test instance */
7776 class RayTracingMiscTestInstance : public TestInstance
7777 {
7778 public:
7779     RayTracingMiscTestInstance(Context &context, const CaseDef &data, TestBase *testPtr);
7780     ~RayTracingMiscTestInstance(void);
7781 
7782     tcu::TestStatus iterate(void);
7783 
7784 protected:
7785     void checkSupport(void) const;
7786     de::MovePtr<BufferWithMemory> runTest(void);
7787 
7788 private:
7789     CaseDef m_data;
7790 
7791     de::MovePtr<RayTracingProperties> m_rayTracingPropsPtr;
7792     TestBase *m_testPtr;
7793 };
7794 
RayTracingMiscTestInstance(Context & context,const CaseDef & data,TestBase * testPtr)7795 RayTracingMiscTestInstance::RayTracingMiscTestInstance(Context &context, const CaseDef &data, TestBase *testPtr)
7796     : vkt::TestInstance(context)
7797     , m_data(data)
7798     , m_rayTracingPropsPtr(makeRayTracingProperties(context.getInstanceInterface(), context.getPhysicalDevice()))
7799     , m_testPtr(testPtr)
7800 {
7801     m_testPtr->init(m_context, m_rayTracingPropsPtr.get());
7802 }
7803 
~RayTracingMiscTestInstance(void)7804 RayTracingMiscTestInstance::~RayTracingMiscTestInstance(void)
7805 {
7806     /* Stub */
7807 }
7808 
checkSupport(void) const7809 void RayTracingMiscTestInstance::checkSupport(void) const
7810 {
7811     if (m_testPtr->getResultBufferSize() > m_context.getDeviceVulkan11Properties().maxMemoryAllocationSize)
7812         TCU_THROW(NotSupportedError,
7813                   "VkPhysicalDeviceVulkan11Properties::maxMemoryAllocationSize too small, allocation might fail");
7814 }
7815 
runTest(void)7816 de::MovePtr<BufferWithMemory> RayTracingMiscTestInstance::runTest(void)
7817 {
7818     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
7819     const VkDevice deviceVk                = m_context.getDevice();
7820 
7821     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
7822     const VkQueue queueVk           = m_context.getUniversalQueue();
7823     Allocator &allocator            = m_context.getDefaultAllocator();
7824 
7825     de::MovePtr<BufferWithMemory> resultBufferPtr;
7826 
7827     // Determine group indices
7828     const auto ahitCollectionShaderNameVec         = m_testPtr->getAHitShaderCollectionShaderNames();
7829     const auto chitCollectionShaderNameVec         = m_testPtr->getCHitShaderCollectionShaderNames();
7830     const auto intersectionCollectionShaderNameVec = m_testPtr->getIntersectionShaderCollectionShaderNames();
7831     const auto missCollectionShaderNameVec         = m_testPtr->getMissShaderCollectionShaderNames();
7832 
7833     const uint32_t nRaygenGroups = 1;
7834     const uint32_t nMissGroups   = static_cast<uint32_t>(missCollectionShaderNameVec.size());
7835     const uint32_t nHitGroups    = de::max(de::max(static_cast<uint32_t>(ahitCollectionShaderNameVec.size()),
7836                                                    static_cast<uint32_t>(chitCollectionShaderNameVec.size())),
7837                                            static_cast<uint32_t>(intersectionCollectionShaderNameVec.size()));
7838 
7839     const uint32_t raygenGroupIndex = 0;
7840     const uint32_t missGroupIndex   = nRaygenGroups;
7841     const uint32_t hitGroupIndex    = missGroupIndex + nMissGroups;
7842 
7843     const auto callableShaderCollectionNames = m_testPtr->getCallableShaderCollectionNames();
7844     auto &collection                         = m_context.getBinaryCollection();
7845     const auto resultBufferSize              = m_testPtr->getResultBufferSize();
7846 
7847     const Move<VkDescriptorSetLayout> descriptorSetLayoutPtr =
7848         DescriptorSetLayoutBuilder()
7849             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)
7850             .addArrayBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, m_testPtr->getASBindingArraySize(),
7851                              ALL_RAY_TRACING_STAGES)
7852             .build(deviceInterface, deviceVk);
7853 
7854     const Move<VkDescriptorPool> descriptorPoolPtr =
7855         DescriptorPoolBuilder()
7856             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
7857             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, m_testPtr->getASBindingArraySize())
7858             .build(deviceInterface, deviceVk, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); /* maxSets */
7859 
7860     const Move<VkDescriptorSet> descriptorSetPtr =
7861         makeDescriptorSet(deviceInterface, deviceVk, *descriptorPoolPtr, *descriptorSetLayoutPtr);
7862 
7863     const Move<VkPipelineLayout> pipelineLayoutPtr =
7864         m_testPtr->getPipelineLayout(deviceInterface, deviceVk, descriptorSetLayoutPtr.get());
7865 
7866     const Move<VkCommandPool> cmdPoolPtr = createCommandPool(deviceInterface, deviceVk, 0, /* pCreateInfo */
7867                                                              queueFamilyIndex);
7868 
7869     const Move<VkCommandBuffer> cmdBufferPtr =
7870         allocateCommandBuffer(deviceInterface, deviceVk, *cmdPoolPtr, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
7871 
7872     Move<VkPipeline> pipelineVkPtr;
7873     de::MovePtr<RayTracingPipeline> rayTracingPipelinePtr = de::newMovePtr<RayTracingPipeline>();
7874 
7875     {
7876         Move<VkShaderModule> raygenShader =
7877             createShaderModule(deviceInterface, deviceVk, collection.get("rgen"), 0); /* flags */
7878 
7879         rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, makeVkSharedPtr(raygenShader),
7880                                          raygenGroupIndex,
7881                                          m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_RAYGEN_BIT_KHR));
7882     }
7883 
7884     {
7885         for (uint32_t nMissShaderName = 0; nMissShaderName < static_cast<uint32_t>(missCollectionShaderNameVec.size());
7886              nMissShaderName++)
7887         {
7888             const auto &currentMissShaderName = missCollectionShaderNameVec.at(nMissShaderName);
7889             Move<VkShaderModule> missShader =
7890                 createShaderModule(deviceInterface, deviceVk, collection.get(currentMissShaderName), 0); /* flags */
7891 
7892             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, makeVkSharedPtr(missShader),
7893                                              missGroupIndex + nMissShaderName,
7894                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_MISS_BIT_KHR));
7895         }
7896     }
7897 
7898     {
7899         for (uint32_t nAHitShaderName = 0; nAHitShaderName < static_cast<uint32_t>(ahitCollectionShaderNameVec.size());
7900              nAHitShaderName++)
7901         {
7902             const auto &currentAHitShaderName = ahitCollectionShaderNameVec.at(nAHitShaderName);
7903             Move<VkShaderModule> anyHitShader =
7904                 createShaderModule(deviceInterface, deviceVk, collection.get(currentAHitShaderName), 0); /* flags */
7905 
7906             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, makeVkSharedPtr(anyHitShader),
7907                                              hitGroupIndex + nAHitShaderName,
7908                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_ANY_HIT_BIT_KHR));
7909         }
7910 
7911         for (uint32_t nCHitShaderName = 0; nCHitShaderName < static_cast<uint32_t>(chitCollectionShaderNameVec.size());
7912              nCHitShaderName++)
7913         {
7914             const auto &currentCHitShaderName = chitCollectionShaderNameVec.at(nCHitShaderName);
7915             Move<VkShaderModule> closestHitShader =
7916                 createShaderModule(deviceInterface, deviceVk, collection.get(currentCHitShaderName), 0); /* flags */
7917 
7918             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, makeVkSharedPtr(closestHitShader),
7919                                              hitGroupIndex + nCHitShaderName,
7920                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR));
7921         }
7922 
7923         if (m_data.geometryType == GeometryType::AABB || m_data.geometryType == GeometryType::AABB_AND_TRIANGLES)
7924         {
7925             for (uint32_t nIntersectionShaderName = 0;
7926                  nIntersectionShaderName < static_cast<uint32_t>(intersectionCollectionShaderNameVec.size());
7927                  nIntersectionShaderName++)
7928             {
7929                 const auto &currentIntersectionShaderName =
7930                     intersectionCollectionShaderNameVec.at(nIntersectionShaderName);
7931                 Move<VkShaderModule> intersectionShader = createShaderModule(
7932                     deviceInterface, deviceVk, collection.get(currentIntersectionShaderName), 0); /* flags */
7933 
7934                 rayTracingPipelinePtr->addShader(
7935                     VK_SHADER_STAGE_INTERSECTION_BIT_KHR, makeVkSharedPtr(intersectionShader),
7936                     hitGroupIndex + nIntersectionShaderName,
7937                     m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_INTERSECTION_BIT_KHR));
7938             }
7939         }
7940 
7941         for (uint32_t nCallableShader = 0;
7942              nCallableShader < static_cast<uint32_t>(callableShaderCollectionNames.size()); ++nCallableShader)
7943         {
7944             const auto &currentCallableShaderName = callableShaderCollectionNames.at(nCallableShader);
7945             Move<VkShaderModule> callableShader =
7946                 createShaderModule(deviceInterface, deviceVk, collection.get(currentCallableShaderName), 0); /* flags */
7947 
7948             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, makeVkSharedPtr(callableShader),
7949                                              static_cast<uint32_t>(ShaderGroups::FIRST_CALLABLE_GROUP) +
7950                                                  nCallableShader,
7951                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_CALLABLE_BIT_KHR));
7952         }
7953 
7954         if (m_testPtr->usesDynamicStackSize())
7955         {
7956             rayTracingPipelinePtr->addDynamicState(VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR);
7957         }
7958 
7959         rayTracingPipelinePtr->setMaxRecursionDepth(m_testPtr->getMaxRecursionDepthUsed());
7960 
7961         pipelineVkPtr = rayTracingPipelinePtr->createPipeline(deviceInterface, deviceVk, *pipelineLayoutPtr);
7962     }
7963 
7964     /* Cache shader stack size info */
7965     {
7966         VkDeviceSize ahitShaderStackSize     = 0;
7967         VkDeviceSize callableShaderStackSize = 0;
7968         VkDeviceSize chitShaderStackSize     = 0;
7969         VkDeviceSize isectShaderStackSize    = 0;
7970         VkDeviceSize missShaderStackSize     = 0;
7971         VkDeviceSize raygenShaderStackSize   = 0;
7972 
7973         raygenShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
7974             deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::RAYGEN_GROUP),
7975             VK_SHADER_GROUP_SHADER_GENERAL_KHR);
7976 
7977         if (collection.contains("ahit"))
7978         {
7979             ahitShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
7980                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::HIT_GROUP),
7981                 VK_SHADER_GROUP_SHADER_ANY_HIT_KHR);
7982         }
7983 
7984         if (collection.contains("chit"))
7985         {
7986             chitShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
7987                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::HIT_GROUP),
7988                 VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR);
7989         }
7990 
7991         if (m_data.geometryType == GeometryType::AABB || m_data.geometryType == GeometryType::AABB_AND_TRIANGLES)
7992         {
7993             if (collection.contains("intersection"))
7994             {
7995                 isectShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
7996                     deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::HIT_GROUP),
7997                     VK_SHADER_GROUP_SHADER_INTERSECTION_KHR);
7998             }
7999         }
8000 
8001         if (nMissGroups > 0u)
8002         {
8003             missShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8004                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::MISS_GROUP),
8005                 VK_SHADER_GROUP_SHADER_GENERAL_KHR);
8006         }
8007 
8008         for (uint32_t nCallableShader = 0;
8009              nCallableShader < static_cast<uint32_t>(callableShaderCollectionNames.size()); ++nCallableShader)
8010         {
8011             callableShaderStackSize += deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8012                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::FIRST_CALLABLE_GROUP) + nCallableShader,
8013                 VK_SHADER_GROUP_SHADER_GENERAL_KHR);
8014         }
8015 
8016         m_testPtr->onShaderStackSizeDiscovered(raygenShaderStackSize, ahitShaderStackSize, chitShaderStackSize,
8017                                                missShaderStackSize, callableShaderStackSize, isectShaderStackSize);
8018     }
8019 
8020     auto callableShaderBindingTablePtr = de::MovePtr<BufferWithMemory>();
8021 
8022     if (callableShaderCollectionNames.size() != 0)
8023     {
8024         callableShaderBindingTablePtr = rayTracingPipelinePtr->createShaderBindingTable(
8025             deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8026             m_rayTracingPropsPtr->getShaderGroupBaseAlignment(),
8027             static_cast<uint32_t>(ShaderGroups::FIRST_CALLABLE_GROUP),
8028             static_cast<uint32_t>(callableShaderCollectionNames.size()), /* groupCount                  */
8029             0u,                                                          /* additionalBufferCreateFlags */
8030             0u,                                                          /* additionalBufferUsageFlags  */
8031             MemoryRequirement::Any, 0u,                                  /* opaqueCaptureAddress       */
8032             0u,                                                          /* shaderBindingTableOffset   */
8033             m_testPtr->getShaderRecordSize(ShaderGroups::FIRST_CALLABLE_GROUP));
8034     }
8035 
8036     const auto raygenShaderBindingTablePtr = rayTracingPipelinePtr->createShaderBindingTable(
8037         deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8038         m_rayTracingPropsPtr->getShaderGroupBaseAlignment(), raygenGroupIndex,
8039         nRaygenGroups,              /* groupCount                  */
8040         0u,                         /* additionalBufferCreateFlags */
8041         0u,                         /* additionalBufferUsageFlags  */
8042         MemoryRequirement::Any, 0u, /* opaqueCaptureAddress        */
8043         0u);                        /* shaderBindingTableOffset    */
8044 
8045     auto missShaderBindingTablePtr = de::MovePtr<BufferWithMemory>();
8046     if (nMissGroups > 0u)
8047     {
8048         const void *missShaderBindingGroupShaderRecordDataPtr =
8049             m_testPtr->getShaderRecordData(ShaderGroups::MISS_GROUP);
8050         missShaderBindingTablePtr = rayTracingPipelinePtr->createShaderBindingTable(
8051             deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8052             m_rayTracingPropsPtr->getShaderGroupBaseAlignment(), missGroupIndex,
8053             nMissGroups,                /* groupCount                  */
8054             0u,                         /* additionalBufferCreateFlags */
8055             0u,                         /* additionalBufferUsageFlags  */
8056             MemoryRequirement::Any, 0u, /* opaqueCaptureAddress       */
8057             0u,                         /* shaderBindingTableOffset   */
8058             m_testPtr->getShaderRecordSize(ShaderGroups::MISS_GROUP), &missShaderBindingGroupShaderRecordDataPtr);
8059     }
8060 
8061     auto hitShaderBindingTablePtr = de::MovePtr<BufferWithMemory>();
8062     if (nHitGroups > 0u)
8063     {
8064         const void *hitShaderBindingGroupShaderRecordDataPtr = m_testPtr->getShaderRecordData(ShaderGroups::HIT_GROUP);
8065         hitShaderBindingTablePtr                             = rayTracingPipelinePtr->createShaderBindingTable(
8066             deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8067             m_rayTracingPropsPtr->getShaderGroupBaseAlignment(), hitGroupIndex,
8068             nHitGroups,                 /* groupCount                  */
8069             0u,                         /* additionalBufferCreateFlags */
8070             0u,                         /* additionalBufferUsageFlags  */
8071             MemoryRequirement::Any, 0u, /* opaqueCaptureAddress       */
8072             0u,                         /* shaderBindingTableOffset   */
8073             m_testPtr->getShaderRecordSize(ShaderGroups::HIT_GROUP), &hitShaderBindingGroupShaderRecordDataPtr);
8074     }
8075 
8076     {
8077         const auto resultBufferCreateInfo = makeBufferCreateInfo(
8078             resultBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
8079         const auto resultBufferDataVec = m_testPtr->getResultBufferStartData();
8080 
8081         resultBufferPtr = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
8082             deviceInterface, deviceVk, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
8083 
8084         if (resultBufferDataVec.size() > 0)
8085         {
8086             DE_ASSERT(static_cast<uint32_t>(resultBufferDataVec.size()) == resultBufferSize);
8087 
8088             memcpy(resultBufferPtr->getAllocation().getHostPtr(), resultBufferDataVec.data(),
8089                    resultBufferDataVec.size());
8090 
8091             flushAlloc(deviceInterface, deviceVk, resultBufferPtr->getAllocation());
8092         }
8093     }
8094 
8095     beginCommandBuffer(deviceInterface, *cmdBufferPtr, 0u /* flags */);
8096     {
8097         m_testPtr->initAS(m_context, m_rayTracingPropsPtr.get(), *cmdBufferPtr);
8098 
8099         std::vector<TopLevelAccelerationStructure *> tlasPtrVec = m_testPtr->getTLASPtrVecToBind();
8100         std::vector<VkAccelerationStructureKHR> tlasVkVec;
8101 
8102         for (auto &currentTLASPtr : tlasPtrVec)
8103         {
8104             tlasVkVec.push_back(*currentTLASPtr->getPtr());
8105         }
8106 
8107         if (m_testPtr->getResultBufferStartData().size() == 0)
8108         {
8109             deviceInterface.cmdFillBuffer(*cmdBufferPtr, **resultBufferPtr, 0, /* dstOffset */
8110                                           VK_WHOLE_SIZE, 0);                   /* data */
8111 
8112             {
8113                 const auto postFillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, /* srcAccessMask */
8114                                                                      VK_ACCESS_SHADER_WRITE_BIT,   /* dstAccessMask */
8115                                                                      **resultBufferPtr, 0,         /* offset */
8116                                                                      VK_WHOLE_SIZE);
8117 
8118                 cmdPipelineBufferMemoryBarrier(deviceInterface, *cmdBufferPtr,
8119                                                VK_PIPELINE_STAGE_TRANSFER_BIT,               /* srcStageMask */
8120                                                VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, /* dstStageMask */
8121                                                &postFillBarrier);
8122             }
8123         }
8124 
8125         {
8126             VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
8127                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
8128                 DE_NULL,                                                           //  const void* pNext;
8129                 static_cast<uint32_t>(tlasVkVec.size()), //  uint32_t accelerationStructureCount;
8130                 tlasVkVec.data(),                        //  const VkAccelerationStructureKHR* pAccelerationStructures;
8131             };
8132 
8133             const auto descriptorResultBufferInfo = makeDescriptorBufferInfo(**resultBufferPtr, 0, /* offset */
8134                                                                              resultBufferSize);
8135 
8136             DescriptorSetUpdateBuilder()
8137                 .writeSingle(*descriptorSetPtr, DescriptorSetUpdateBuilder::Location::binding(0u),
8138                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorResultBufferInfo)
8139                 .writeArray(*descriptorSetPtr, DescriptorSetUpdateBuilder::Location::binding(1u),
8140                             VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, static_cast<uint32_t>(tlasVkVec.size()),
8141                             &accelerationStructureWriteDescriptorSet)
8142                 .update(deviceInterface, deviceVk);
8143         }
8144 
8145         deviceInterface.cmdBindDescriptorSets(*cmdBufferPtr, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayoutPtr,
8146                                               0,                          /* firstSet           */
8147                                               1,                          /* descriptorSetCount */
8148                                               &descriptorSetPtr.get(), 0, /* dynamicOffsetCount */
8149                                               DE_NULL);                   /* pDynamicOffsets    */
8150 
8151         deviceInterface.cmdBindPipeline(*cmdBufferPtr, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineVkPtr);
8152 
8153         {
8154             const auto preTraceMemoryBarrier =
8155                 (m_data.type == TestType::USE_MEMORY_ACCESS) ?
8156                     makeMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, /* srcAccessMask */
8157                                       VK_ACCESS_MEMORY_READ_BIT)  /* dstAccessMask */
8158                     :
8159                     makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, /* srcAccessMask */
8160                                       VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR); /* dstAccessMask */
8161 
8162             cmdPipelineMemoryBarrier(deviceInterface, *cmdBufferPtr,
8163                                      VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, /* srcStageMask */
8164                                      VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,           /* dstStageMask */
8165                                      &preTraceMemoryBarrier);
8166         }
8167 
8168         {
8169             const auto nTraceRaysInvocationsNeeded = m_testPtr->getNTraceRayInvocationsNeeded();
8170             const auto handleSize                  = m_rayTracingPropsPtr->getShaderGroupHandleSize();
8171             const auto missStride =
8172                 de::roundUp(handleSize + m_testPtr->getShaderRecordSize(ShaderGroups::MISS_GROUP), handleSize);
8173             const auto hitStride =
8174                 de::roundUp(handleSize + m_testPtr->getShaderRecordSize(ShaderGroups::HIT_GROUP), handleSize);
8175             const auto callStride = de::roundUp(
8176                 handleSize + m_testPtr->getShaderRecordSize(ShaderGroups::FIRST_CALLABLE_GROUP), handleSize);
8177             const auto raygenShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(
8178                 getBufferDeviceAddress(deviceInterface, deviceVk, raygenShaderBindingTablePtr->get(), 0 /* offset */),
8179                 handleSize, handleSize);
8180             const auto missShaderBindingTableRegion =
8181                 ((nMissGroups > 0u) ? makeStridedDeviceAddressRegionKHR(
8182                                           getBufferDeviceAddress(deviceInterface, deviceVk,
8183                                                                  missShaderBindingTablePtr->get(), 0 /* offset */),
8184                                           missStride, missStride * nMissGroups) :
8185                                       makeStridedDeviceAddressRegionKHR(DE_NULL, 0, /* stride */
8186                                                                         0 /* size   */));
8187             const auto hitShaderBindingTableRegion =
8188                 ((nHitGroups > 0u) ? makeStridedDeviceAddressRegionKHR(
8189                                          getBufferDeviceAddress(deviceInterface, deviceVk,
8190                                                                 hitShaderBindingTablePtr->get(), 0 /* offset */),
8191                                          hitStride, hitStride * nHitGroups) :
8192                                      makeStridedDeviceAddressRegionKHR(DE_NULL, 0, /* stride */
8193                                                                        0 /* size   */));
8194 
8195             const auto callableShaderBindingTableRegion =
8196                 (callableShaderCollectionNames.size() > 0) ?
8197                     makeStridedDeviceAddressRegionKHR(
8198                         getBufferDeviceAddress(deviceInterface, deviceVk, callableShaderBindingTablePtr->get(),
8199                                                0 /* offset */),
8200                         callStride, /* stride */
8201                         callStride * static_cast<uint32_t>(callableShaderCollectionNames.size())) :
8202                     makeStridedDeviceAddressRegionKHR(DE_NULL, 0, /* stride */
8203                                                       0 /* size   */);
8204 
8205             if (m_testPtr->usesDynamicStackSize())
8206             {
8207                 deviceInterface.cmdSetRayTracingPipelineStackSizeKHR(
8208                     *cmdBufferPtr, m_testPtr->getDynamicStackSize(m_testPtr->getMaxRecursionDepthUsed()));
8209             }
8210 
8211             for (uint32_t nInvocation = 0; nInvocation < nTraceRaysInvocationsNeeded; ++nInvocation)
8212             {
8213                 m_testPtr->onBeforeCmdTraceRays(nInvocation, m_context, *cmdBufferPtr, *pipelineLayoutPtr);
8214 
8215                 cmdTraceRays(deviceInterface, *cmdBufferPtr, &raygenShaderBindingTableRegion,
8216                              &missShaderBindingTableRegion, &hitShaderBindingTableRegion,
8217                              &callableShaderBindingTableRegion, m_testPtr->getDispatchSize()[0],
8218                              m_testPtr->getDispatchSize()[1], m_testPtr->getDispatchSize()[2]);
8219             }
8220         }
8221 
8222         {
8223             const auto postTraceMemoryBarrier = (m_data.type == TestType::USE_MEMORY_ACCESS) ?
8224                                                     makeMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, /* srcAccessMask */
8225                                                                       VK_ACCESS_MEMORY_READ_BIT)  /* dstAccessMask */
8226                                                     :
8227                                                     makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, /* srcAccessMask */
8228                                                                       VK_ACCESS_HOST_READ_BIT);   /* dstAccessMask */
8229 
8230             cmdPipelineMemoryBarrier(deviceInterface, *cmdBufferPtr,
8231                                      VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, /* srcStageMask */
8232                                      VK_PIPELINE_STAGE_HOST_BIT,                   /* dstStageMask */
8233                                      &postTraceMemoryBarrier);
8234         }
8235     }
8236     endCommandBuffer(deviceInterface, *cmdBufferPtr);
8237 
8238     submitCommandsAndWait(deviceInterface, deviceVk, queueVk, cmdBufferPtr.get());
8239 
8240     invalidateMappedMemoryRange(deviceInterface, deviceVk, resultBufferPtr->getAllocation().getMemory(),
8241                                 resultBufferPtr->getAllocation().getOffset(), VK_WHOLE_SIZE);
8242 
8243     m_testPtr->resetTLAS();
8244 
8245     return resultBufferPtr;
8246 }
8247 
iterate(void)8248 tcu::TestStatus RayTracingMiscTestInstance::iterate(void)
8249 {
8250     checkSupport();
8251 
8252     const de::MovePtr<BufferWithMemory> bufferGPUPtr = runTest();
8253     const uint32_t *bufferGPUDataPtr                 = (uint32_t *)bufferGPUPtr->getAllocation().getHostPtr();
8254     const bool result                                = m_testPtr->verifyResultBuffer(bufferGPUDataPtr);
8255 
8256     if (result)
8257         return tcu::TestStatus::pass("Pass");
8258     else
8259         return tcu::TestStatus::fail("Fail");
8260 }
8261 
checkRTPipelineSupport(Context & context)8262 void checkRTPipelineSupport(Context &context)
8263 {
8264     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
8265     context.requireDeviceFunctionality("VK_KHR_buffer_device_address");
8266     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
8267 }
8268 
checkReuseCreationBufferSupport(Context & context,bool)8269 void checkReuseCreationBufferSupport(Context &context, bool)
8270 {
8271     checkRTPipelineSupport(context);
8272 }
8273 
initBasicHitBufferPrograms(vk::SourceCollections & programCollection)8274 void initBasicHitBufferPrograms(vk::SourceCollections &programCollection)
8275 {
8276     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
8277 
8278     std::ostringstream rgen;
8279     std::ostringstream chit;
8280 
8281     rgen << "#version 460\n"
8282          << "#extension GL_EXT_ray_tracing : require\n"
8283          << "layout(location=0) rayPayloadEXT vec3 unused;\n"
8284          << "layout(set=0, binding=0) uniform accelerationStructureEXT topLevelAS;\n"
8285          << "layout(set=0, binding=1) buffer OutputBuffer { float val; } outBuffer;\n"
8286          << "\n"
8287          << "void main()\n"
8288          << "{\n"
8289          << "  uint  rayFlags = 0u;\n"
8290          << "  uint  cullMask = 0xFFu;\n"
8291          << "  float tmin     = 0.0;\n"
8292          << "  float tmax     = 9.0;\n"
8293          << "  vec3  origin   = vec3(0.0, 0.0, 0.0);\n"
8294          << "  vec3  direct   = vec3(0.0, 0.0, 1.0);\n"
8295          << "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
8296          << "}\n";
8297 
8298     chit << "#version 460\n"
8299          << "#extension GL_EXT_ray_tracing : require\n"
8300          << "layout(location=0) rayPayloadInEXT vec3 unused;\n"
8301          << "layout(set=0, binding=0) uniform accelerationStructureEXT topLevelAS;\n"
8302          << "layout(set=0, binding=1) buffer OutputBuffer { float val; } outBuffer;\n"
8303          << "\n"
8304          << "void main()\n"
8305          << "{\n"
8306          << "  outBuffer.val = 1.0;\n"
8307          << "}\n";
8308 
8309     programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(rgen.str())) << buildOptions;
8310     programCollection.glslSources.add("chit")
8311         << glu::ClosestHitSource(updateRayTracingGLSL(chit.str())) << buildOptions;
8312 }
8313 
initReuseCreationBufferPrograms(vk::SourceCollections & programCollection,bool)8314 void initReuseCreationBufferPrograms(vk::SourceCollections &programCollection, bool)
8315 {
8316     initBasicHitBufferPrograms(programCollection);
8317 }
8318 
8319 // Creates an empty shader binding table with a zeroed-out shader group handle.
createEmptySBT(const DeviceInterface & vkd,VkDevice device,Allocator & alloc,uint32_t shaderGroupHandleSize)8320 de::MovePtr<BufferWithMemory> createEmptySBT(const DeviceInterface &vkd, VkDevice device, Allocator &alloc,
8321                                              uint32_t shaderGroupHandleSize)
8322 {
8323     const auto sbtSize  = static_cast<VkDeviceSize>(shaderGroupHandleSize);
8324     const auto sbtFlags = (VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR |
8325                            VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
8326     const auto sbtInfo  = makeBufferCreateInfo(sbtSize, sbtFlags);
8327     const auto sbtReqs  = (MemoryRequirement::HostVisible | MemoryRequirement::DeviceAddress);
8328 
8329     auto sbtBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, alloc, sbtInfo, sbtReqs));
8330     auto &sbtAlloc = sbtBuffer->getAllocation();
8331     void *sbtData  = sbtAlloc.getHostPtr();
8332 
8333     deMemset(sbtData, 0, static_cast<size_t>(sbtSize));
8334     flushAlloc(vkd, device, sbtAlloc);
8335 
8336     return sbtBuffer;
8337 }
8338 
nullMissInstance(Context & context)8339 tcu::TestStatus nullMissInstance(Context &context)
8340 {
8341     const auto &vki    = context.getInstanceInterface();
8342     const auto physDev = context.getPhysicalDevice();
8343     const auto &vkd    = context.getDeviceInterface();
8344     const auto device  = context.getDevice();
8345     auto &alloc        = context.getDefaultAllocator();
8346     const auto qIndex  = context.getUniversalQueueFamilyIndex();
8347     const auto queue   = context.getUniversalQueue();
8348     const auto stages  = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
8349 
8350     // Command pool and buffer.
8351     const auto cmdPool      = makeCommandPool(vkd, device, qIndex);
8352     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8353     const auto cmdBuffer    = cmdBufferPtr.get();
8354 
8355     beginCommandBuffer(vkd, cmdBuffer);
8356 
8357     // Build acceleration structures.
8358     auto topLevelAS    = makeTopLevelAccelerationStructure();
8359     auto bottomLevelAS = makeBottomLevelAccelerationStructure();
8360 
8361     std::vector<tcu::Vec3> triangle;
8362     triangle.reserve(3u);
8363     triangle.emplace_back(0.0f, 1.0f, 10.0f);
8364     triangle.emplace_back(-1.0f, -1.0f, 10.0f);
8365     triangle.emplace_back(1.0f, -1.0f, 10.0f);
8366     bottomLevelAS->addGeometry(triangle, true /*triangles*/);
8367     bottomLevelAS->createAndBuild(vkd, device, cmdBuffer, alloc);
8368 
8369     de::SharedPtr<BottomLevelAccelerationStructure> blasSharedPtr(bottomLevelAS.release());
8370     topLevelAS->setInstanceCount(1);
8371     topLevelAS->addInstance(blasSharedPtr);
8372     topLevelAS->createAndBuild(vkd, device, cmdBuffer, alloc);
8373 
8374     // Create output buffer.
8375     const auto bufferSize       = static_cast<VkDeviceSize>(sizeof(float));
8376     const auto bufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
8377     BufferWithMemory buffer(vkd, device, alloc, bufferCreateInfo, MemoryRequirement::HostVisible);
8378     auto &bufferAlloc = buffer.getAllocation();
8379 
8380     // Fill output buffer with an initial value.
8381     deMemset(bufferAlloc.getHostPtr(), 0, sizeof(float));
8382     flushAlloc(vkd, device, bufferAlloc);
8383 
8384     // Descriptor set layout and pipeline layout.
8385     DescriptorSetLayoutBuilder setLayoutBuilder;
8386     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, stages);
8387     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stages);
8388 
8389     const auto setLayout      = setLayoutBuilder.build(vkd, device);
8390     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
8391 
8392     // Descriptor pool and set.
8393     DescriptorPoolBuilder poolBuilder;
8394     poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
8395     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
8396     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
8397     const auto descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
8398 
8399     // Update descriptor set.
8400     {
8401         const VkWriteDescriptorSetAccelerationStructureKHR accelDescInfo = {
8402             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
8403             nullptr,
8404             1u,
8405             topLevelAS.get()->getPtr(),
8406         };
8407 
8408         const auto bufferDescInfo = makeDescriptorBufferInfo(buffer.get(), 0ull, VK_WHOLE_SIZE);
8409 
8410         DescriptorSetUpdateBuilder updateBuilder;
8411         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
8412                                   VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelDescInfo);
8413         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
8414                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescInfo);
8415         updateBuilder.update(vkd, device);
8416     }
8417 
8418     // Shader modules.
8419     auto rgenModule = createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"), 0);
8420     auto chitModule = createShaderModule(vkd, device, context.getBinaryCollection().get("chit"), 0);
8421 
8422     // Get some ray tracing properties.
8423     uint32_t shaderGroupHandleSize    = 0u;
8424     uint32_t shaderGroupBaseAlignment = 1u;
8425     {
8426         const auto rayTracingPropertiesKHR = makeRayTracingProperties(vki, physDev);
8427         shaderGroupHandleSize              = rayTracingPropertiesKHR->getShaderGroupHandleSize();
8428         shaderGroupBaseAlignment           = rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
8429     }
8430 
8431     // Create raytracing pipeline and shader binding tables.
8432     Move<VkPipeline> pipeline;
8433 
8434     de::MovePtr<BufferWithMemory> raygenSBT;
8435     de::MovePtr<BufferWithMemory> missSBT;
8436     de::MovePtr<BufferWithMemory> hitSBT;
8437     de::MovePtr<BufferWithMemory> callableSBT;
8438 
8439     VkStridedDeviceAddressRegionKHR raygenSBTRegion   = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8440     VkStridedDeviceAddressRegionKHR missSBTRegion     = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8441     VkStridedDeviceAddressRegionKHR hitSBTRegion      = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8442     VkStridedDeviceAddressRegionKHR callableSBTRegion = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8443 
8444     {
8445         const auto rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
8446 
8447         rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenModule, 0u);
8448         rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitModule, 1u);
8449 
8450         pipeline = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout.get());
8451 
8452         raygenSBT = rayTracingPipeline->createShaderBindingTable(
8453             vkd, device, pipeline.get(), alloc, shaderGroupHandleSize, shaderGroupBaseAlignment, 0u, 1u);
8454         raygenSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenSBT->get(), 0ull),
8455                                                             shaderGroupHandleSize, shaderGroupHandleSize);
8456 
8457         hitSBT = rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline.get(), alloc, shaderGroupHandleSize,
8458                                                               shaderGroupBaseAlignment, 1u, 1u);
8459         hitSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitSBT->get(), 0ull),
8460                                                          shaderGroupHandleSize, shaderGroupHandleSize);
8461 
8462         // Critical for the test: the miss shader binding table buffer is empty and contains a zero'ed out shader group handle.
8463         missSBT       = createEmptySBT(vkd, device, alloc, shaderGroupHandleSize);
8464         missSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missSBT->get(), 0ull),
8465                                                           shaderGroupHandleSize, shaderGroupHandleSize);
8466     }
8467 
8468     // Trace rays.
8469     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline.get());
8470     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipelineLayout.get(), 0u, 1u,
8471                               &descriptorSet.get(), 0u, nullptr);
8472     vkd.cmdTraceRaysKHR(cmdBuffer, &raygenSBTRegion, &missSBTRegion, &hitSBTRegion, &callableSBTRegion, 1u, 1u, 1u);
8473 
8474     // Barrier for the output buffer just in case (no writes should take place).
8475     const auto bufferBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
8476     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
8477                            &bufferBarrier, 0u, nullptr, 0u, nullptr);
8478 
8479     endCommandBuffer(vkd, cmdBuffer);
8480     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
8481 
8482     // Read value back from the buffer. No write should have taken place.
8483     float bufferValue = 0.0f;
8484     invalidateAlloc(vkd, device, bufferAlloc);
8485     deMemcpy(&bufferValue, bufferAlloc.getHostPtr(), sizeof(bufferValue));
8486 
8487     if (bufferValue != 0.0f)
8488         TCU_FAIL("Unexpected value found in buffer: " + de::toString(bufferValue));
8489 
8490     return tcu::TestStatus::pass("Pass");
8491 }
8492 
getInRangeTrianglePoints(float offset)8493 std::vector<tcu::Vec3> getInRangeTrianglePoints(float offset)
8494 {
8495     std::vector<tcu::Vec3> triangle;
8496     triangle.reserve(3u);
8497     triangle.emplace_back(0.0f + offset, 1.0f + offset, 5.0f + offset);
8498     triangle.emplace_back(-1.0f + offset, -1.0f + offset, 5.0f + offset);
8499     triangle.emplace_back(1.0f + offset, -1.0f + offset, 5.0f + offset);
8500 
8501     return triangle;
8502 }
8503 
reuseCreationBufferInstance(Context & context,const bool disturbTop)8504 tcu::TestStatus reuseCreationBufferInstance(Context &context, const bool disturbTop /* if false, bottom AS */)
8505 {
8506     const auto &vki          = context.getInstanceInterface();
8507     const auto physDev       = context.getPhysicalDevice();
8508     const auto &vkd          = context.getDeviceInterface();
8509     const auto device        = context.getDevice();
8510     auto &alloc              = context.getDefaultAllocator();
8511     const auto qIndex        = context.getUniversalQueueFamilyIndex();
8512     const auto queue         = context.getUniversalQueue();
8513     const auto stages        = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
8514     const bool disturbBottom = (!disturbTop);
8515 
8516     // We don't know exactly how much space each implementation is going to require to build the top and bottom accel structures,
8517     // but in practice the number appears to be in the low-KBs range, so creating a 4MB buffer will give us enough room to almost
8518     // guarantee the buffer is going to be used.
8519     const VkDeviceSize creationBufferSize = 4u * 1024u * 1024u;
8520     const auto creationBufferUsage =
8521         (VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
8522     const auto creationBufferInfo = makeBufferCreateInfo(creationBufferSize, creationBufferUsage);
8523     const auto creationBufferMemReqs =
8524         (MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
8525     BufferWithMemory creationBuffer(vkd, device, alloc, creationBufferInfo, creationBufferMemReqs);
8526 
8527     // Command pool and buffer.
8528     const auto cmdPool          = makeCommandPool(vkd, device, qIndex);
8529     const auto mainCmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8530     const auto bottomBuildCmd   = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8531     const auto topBuildCmd      = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8532 
8533     // Build acceleration structures.
8534     auto topLevelAS         = makeTopLevelAccelerationStructure();
8535     auto topLevelOtherAS    = makeTopLevelAccelerationStructure();
8536     auto bottomLevelAS      = makeBottomLevelAccelerationStructure();
8537     auto bottomLevelOtherAS = makeBottomLevelAccelerationStructure();
8538 
8539     const auto goodTriangle = getInRangeTrianglePoints(0.0f);
8540     const auto badTriangle  = getInRangeTrianglePoints(100.0f);
8541 
8542     bottomLevelAS->addGeometry(goodTriangle, true /*triangles*/);
8543     bottomLevelOtherAS->addGeometry(badTriangle, true /*triangles*/);
8544 
8545     // Critical for the test: we create an additional acceleration structure without building it, and reusing the same creation
8546     // buffer. The creation operation is supposed to avoid touching the buffer, so this should not alter its contents and using the
8547     // original acceleration structure after this step should still work.
8548 
8549     beginCommandBuffer(vkd, bottomBuildCmd.get());
8550 
8551     if (disturbBottom)
8552     {
8553         bottomLevelAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8554                               creationBufferSize);
8555         bottomLevelAS->build(vkd, device, bottomBuildCmd.get());
8556     }
8557     else
8558         bottomLevelAS->createAndBuild(vkd, device, bottomBuildCmd.get(), alloc);
8559 
8560     // Submit command buffer so the bottom acceleration structure is actually built and stored in the creation buffer.
8561     endCommandBuffer(vkd, bottomBuildCmd.get());
8562     submitCommandsAndWait(vkd, device, queue, bottomBuildCmd.get());
8563 
8564     if (disturbBottom)
8565     {
8566         bottomLevelOtherAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8567                                    creationBufferSize);
8568         // Note how we have created the second bottom level accel structure reusing the buffer but we haven't built it.
8569     }
8570 
8571     using SharedBottomPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
8572 
8573     SharedBottomPtr blasSharedPtr(bottomLevelAS.release());
8574     SharedBottomPtr blasOtherSharedPtr(nullptr);
8575 
8576     topLevelAS->setInstanceCount(1);
8577     topLevelAS->addInstance(blasSharedPtr);
8578 
8579     beginCommandBuffer(vkd, topBuildCmd.get());
8580 
8581     if (disturbTop)
8582     {
8583         topLevelAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8584                            creationBufferSize);
8585         topLevelAS->build(vkd, device, topBuildCmd.get());
8586 
8587         bottomLevelOtherAS->createAndBuild(vkd, device, topBuildCmd.get(), alloc);
8588     }
8589     else
8590         topLevelAS->createAndBuild(vkd, device, topBuildCmd.get(), alloc);
8591 
8592     // Submit command buffer so the top acceleration structure is actually built and stored in the creation buffer.
8593     endCommandBuffer(vkd, topBuildCmd.get());
8594     submitCommandsAndWait(vkd, device, queue, topBuildCmd.get());
8595 
8596     if (disturbTop)
8597     {
8598         SharedBottomPtr auxiliar(bottomLevelOtherAS.release());
8599         blasOtherSharedPtr.swap(auxiliar);
8600 
8601         topLevelOtherAS->setInstanceCount(1);
8602         topLevelOtherAS->addInstance(blasOtherSharedPtr);
8603         topLevelOtherAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8604                                 creationBufferSize);
8605         // Note how we have created the second top level accel structure reusing the buffer but we haven't built it.
8606     }
8607 
8608     // Create output buffer.
8609     const auto bufferSize       = static_cast<VkDeviceSize>(sizeof(float));
8610     const auto bufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
8611     BufferWithMemory buffer(vkd, device, alloc, bufferCreateInfo, MemoryRequirement::HostVisible);
8612     auto &bufferAlloc = buffer.getAllocation();
8613 
8614     // Fill output buffer with an initial value.
8615     deMemset(bufferAlloc.getHostPtr(), 0, sizeof(float));
8616     flushAlloc(vkd, device, bufferAlloc);
8617 
8618     // Descriptor set layout and pipeline layout.
8619     DescriptorSetLayoutBuilder setLayoutBuilder;
8620     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, stages);
8621     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stages);
8622 
8623     const auto setLayout      = setLayoutBuilder.build(vkd, device);
8624     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
8625 
8626     // Descriptor pool and set.
8627     DescriptorPoolBuilder poolBuilder;
8628     poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
8629     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
8630     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
8631     const auto descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
8632 
8633     // Update descriptor set.
8634     {
8635         const VkWriteDescriptorSetAccelerationStructureKHR accelDescInfo = {
8636             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
8637             nullptr,
8638             1u,
8639             topLevelAS.get()->getPtr(),
8640         };
8641 
8642         const auto bufferDescInfo = makeDescriptorBufferInfo(buffer.get(), 0ull, VK_WHOLE_SIZE);
8643 
8644         DescriptorSetUpdateBuilder updateBuilder;
8645         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
8646                                   VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelDescInfo);
8647         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
8648                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescInfo);
8649         updateBuilder.update(vkd, device);
8650     }
8651 
8652     // Shader modules.
8653     auto rgenModule = createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"), 0);
8654     auto chitModule = createShaderModule(vkd, device, context.getBinaryCollection().get("chit"), 0);
8655 
8656     // Get some ray tracing properties.
8657     uint32_t shaderGroupHandleSize    = 0u;
8658     uint32_t shaderGroupBaseAlignment = 1u;
8659     {
8660         const auto rayTracingPropertiesKHR = makeRayTracingProperties(vki, physDev);
8661         shaderGroupHandleSize              = rayTracingPropertiesKHR->getShaderGroupHandleSize();
8662         shaderGroupBaseAlignment           = rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
8663     }
8664 
8665     // Create raytracing pipeline and shader binding tables.
8666     Move<VkPipeline> pipeline;
8667 
8668     de::MovePtr<BufferWithMemory> raygenSBT;
8669     de::MovePtr<BufferWithMemory> missSBT;
8670     de::MovePtr<BufferWithMemory> hitSBT;
8671     de::MovePtr<BufferWithMemory> callableSBT;
8672 
8673     VkStridedDeviceAddressRegionKHR raygenSBTRegion   = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8674     VkStridedDeviceAddressRegionKHR missSBTRegion     = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8675     VkStridedDeviceAddressRegionKHR hitSBTRegion      = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8676     VkStridedDeviceAddressRegionKHR callableSBTRegion = makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
8677 
8678     {
8679         const auto rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
8680 
8681         rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenModule, 0u);
8682         rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitModule, 1u);
8683 
8684         pipeline = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout.get());
8685 
8686         raygenSBT = rayTracingPipeline->createShaderBindingTable(
8687             vkd, device, pipeline.get(), alloc, shaderGroupHandleSize, shaderGroupBaseAlignment, 0u, 1u);
8688         raygenSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenSBT->get(), 0ull),
8689                                                             shaderGroupHandleSize, shaderGroupHandleSize);
8690 
8691         hitSBT = rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline.get(), alloc, shaderGroupHandleSize,
8692                                                               shaderGroupBaseAlignment, 1u, 1u);
8693         hitSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitSBT->get(), 0ull),
8694                                                          shaderGroupHandleSize, shaderGroupHandleSize);
8695     }
8696 
8697     const auto mainCmdBuffer = mainCmdBufferPtr.get();
8698     beginCommandBuffer(vkd, mainCmdBuffer);
8699 
8700     // Trace rays.
8701     vkd.cmdBindPipeline(mainCmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline.get());
8702     vkd.cmdBindDescriptorSets(mainCmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipelineLayout.get(), 0u, 1u,
8703                               &descriptorSet.get(), 0u, nullptr);
8704     vkd.cmdTraceRaysKHR(mainCmdBuffer, &raygenSBTRegion, &missSBTRegion, &hitSBTRegion, &callableSBTRegion, 1u, 1u, 1u);
8705 
8706     // Barrier for the output buffer.
8707     const auto bufferBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
8708     vkd.cmdPipelineBarrier(mainCmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, 0u,
8709                            1u, &bufferBarrier, 0u, nullptr, 0u, nullptr);
8710 
8711     endCommandBuffer(vkd, mainCmdBuffer);
8712     submitCommandsAndWait(vkd, device, queue, mainCmdBuffer);
8713 
8714     // Read value back from the buffer.
8715     float bufferValue = 0.0f;
8716     invalidateAlloc(vkd, device, bufferAlloc);
8717     deMemcpy(&bufferValue, bufferAlloc.getHostPtr(), sizeof(bufferValue));
8718 
8719     if (bufferValue != 1.0f)
8720         TCU_FAIL("Unexpected value found in buffer: " + de::toString(bufferValue));
8721 
8722     return tcu::TestStatus::pass("Pass");
8723 }
8724 
8725 } // namespace
8726 
8727 class RayTracingTestCase : public TestCase
8728 {
8729 public:
8730     RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data);
8731     ~RayTracingTestCase(void);
8732 
8733     virtual void checkSupport(Context &context) const final;
8734     virtual TestInstance *createInstance(Context &context) const final;
8735     void initPrograms(SourceCollections &programCollection) const final;
8736 
8737 private:
8738     CaseDef m_data;
8739     mutable std::unique_ptr<TestBase> m_testPtr;
8740 };
8741 
RayTracingTestCase(tcu::TestContext & context,const char * name,const CaseDef data)8742 RayTracingTestCase::RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data)
8743     : vkt::TestCase(context, name)
8744     , m_data(data)
8745 {
8746     /* Stub */
8747 }
8748 
~RayTracingTestCase(void)8749 RayTracingTestCase::~RayTracingTestCase(void)
8750 {
8751 }
8752 
checkSupport(Context & context) const8753 void RayTracingTestCase::checkSupport(Context &context) const
8754 {
8755     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
8756     context.requireDeviceFunctionality("VK_KHR_buffer_device_address");
8757     context.requireDeviceFunctionality("VK_KHR_deferred_host_operations");
8758     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
8759 
8760     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
8761         context.getAccelerationStructureFeatures();
8762     const VkPhysicalDeviceRayTracingPipelineFeaturesKHR &rayTracingPipelineFeaturesKHR =
8763         context.getRayTracingPipelineFeatures();
8764     const auto &rayTracingPipelinePropertiesKHR = context.getRayTracingPipelineProperties();
8765 
8766     if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == false)
8767     {
8768         TCU_THROW(NotSupportedError, "VkPhysicalDeviceRayTracingPipelineFeaturesKHR::rayTracingPipeline is false");
8769     }
8770 
8771     if (accelerationStructureFeaturesKHR.accelerationStructure == false)
8772     {
8773         TCU_THROW(NotSupportedError,
8774                   "VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructure is false");
8775     }
8776 
8777     if (ShaderRecordBlockTest::isTest(m_data.type))
8778     {
8779         if (ShaderRecordBlockTest::isExplicitScalarOffsetTest(m_data.type) ||
8780             ShaderRecordBlockTest::isScalarLayoutTest(m_data.type))
8781         {
8782             context.requireDeviceFunctionality("VK_EXT_scalar_block_layout");
8783         }
8784 
8785         if (ShaderRecordBlockTest::usesF64(m_data.type))
8786         {
8787             context.requireDeviceCoreFeature(vkt::DeviceCoreFeature::DEVICE_CORE_FEATURE_SHADER_FLOAT64);
8788         }
8789 
8790         if (ShaderRecordBlockTest::usesI8(m_data.type) || ShaderRecordBlockTest::usesU8(m_data.type))
8791         {
8792             if (context.get8BitStorageFeatures().storageBuffer8BitAccess == VK_FALSE)
8793             {
8794                 TCU_THROW(NotSupportedError, "storageBuffer8BitAccess feature is unavailable");
8795             }
8796         }
8797 
8798         if (ShaderRecordBlockTest::usesI16(m_data.type) || ShaderRecordBlockTest::usesU16(m_data.type))
8799         {
8800             context.requireDeviceCoreFeature(vkt::DeviceCoreFeature::DEVICE_CORE_FEATURE_SHADER_INT16);
8801         }
8802 
8803         if (ShaderRecordBlockTest::usesI64(m_data.type) || ShaderRecordBlockTest::usesU64(m_data.type))
8804         {
8805             context.requireDeviceCoreFeature(vkt::DeviceCoreFeature::DEVICE_CORE_FEATURE_SHADER_INT64);
8806         }
8807     }
8808 
8809     if (static_cast<uint32_t>(m_data.type) >= static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) &&
8810         static_cast<uint32_t>(m_data.type) <= static_cast<uint32_t>(TestType::RECURSIVE_TRACES_29))
8811     {
8812         const auto nLevels =
8813             static_cast<uint32_t>(m_data.type) - static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + 1;
8814 
8815         if (rayTracingPipelinePropertiesKHR.maxRayRecursionDepth < nLevels)
8816         {
8817             TCU_THROW(NotSupportedError, "Cannot use an unsupported ray recursion depth.");
8818         }
8819     }
8820 }
8821 
initPrograms(SourceCollections & programCollection) const8822 void RayTracingTestCase::initPrograms(SourceCollections &programCollection) const
8823 {
8824     switch (m_data.type)
8825     {
8826     case TestType::AABBS_AND_TRIS_IN_ONE_TL:
8827     {
8828         m_testPtr.reset(new AABBTriTLTest(m_data.geometryType, m_data.asLayout));
8829 
8830         m_testPtr->initPrograms(programCollection);
8831 
8832         break;
8833     }
8834 
8835     case TestType::AS_STRESS_TEST:
8836     {
8837         m_testPtr.reset(new ASStressTest(m_data.geometryType, m_data.asLayout));
8838 
8839         m_testPtr->initPrograms(programCollection);
8840 
8841         break;
8842     }
8843 
8844     case TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST:
8845     case TestType::CALLABLE_SHADER_STRESS_TEST:
8846     {
8847         const bool useDynamicStackSize = (m_data.type == TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST);
8848 
8849         m_testPtr.reset(new CallableShaderStressTest(m_data.geometryType, m_data.asLayout, useDynamicStackSize));
8850 
8851         m_testPtr->initPrograms(programCollection);
8852 
8853         break;
8854     }
8855 
8856     case TestType::CULL_MASK:
8857     case TestType::CULL_MASK_EXTRA_BITS:
8858     {
8859         m_testPtr.reset(
8860             new CullMaskTest(m_data.asLayout, m_data.geometryType, (m_data.type == TestType::CULL_MASK_EXTRA_BITS)));
8861 
8862         m_testPtr->initPrograms(programCollection);
8863 
8864         break;
8865     }
8866 
8867     case TestType::MAX_RAY_HIT_ATTRIBUTE_SIZE:
8868     {
8869         m_testPtr.reset(new MAXRayHitAttributeSizeTest(m_data.geometryType, m_data.asLayout));
8870 
8871         m_testPtr->initPrograms(programCollection);
8872 
8873         break;
8874     }
8875 
8876     case TestType::MAX_RT_INVOCATIONS_SUPPORTED:
8877     {
8878         m_testPtr.reset(new MAXRTInvocationsSupportedTest(m_data.geometryType, m_data.asLayout));
8879 
8880         m_testPtr->initPrograms(programCollection);
8881 
8882         break;
8883     }
8884 
8885     case TestType::NO_DUPLICATE_ANY_HIT:
8886     {
8887         m_testPtr.reset(new NoDuplicateAnyHitTest(m_data.asLayout, m_data.geometryType));
8888 
8889         m_testPtr->initPrograms(programCollection);
8890 
8891         break;
8892     }
8893 
8894     case TestType::RECURSIVE_TRACES_0:
8895     case TestType::RECURSIVE_TRACES_1:
8896     case TestType::RECURSIVE_TRACES_2:
8897     case TestType::RECURSIVE_TRACES_3:
8898     case TestType::RECURSIVE_TRACES_4:
8899     case TestType::RECURSIVE_TRACES_5:
8900     case TestType::RECURSIVE_TRACES_6:
8901     case TestType::RECURSIVE_TRACES_7:
8902     case TestType::RECURSIVE_TRACES_8:
8903     case TestType::RECURSIVE_TRACES_9:
8904     case TestType::RECURSIVE_TRACES_10:
8905     case TestType::RECURSIVE_TRACES_11:
8906     case TestType::RECURSIVE_TRACES_12:
8907     case TestType::RECURSIVE_TRACES_13:
8908     case TestType::RECURSIVE_TRACES_14:
8909     case TestType::RECURSIVE_TRACES_15:
8910     case TestType::RECURSIVE_TRACES_16:
8911     case TestType::RECURSIVE_TRACES_17:
8912     case TestType::RECURSIVE_TRACES_18:
8913     case TestType::RECURSIVE_TRACES_19:
8914     case TestType::RECURSIVE_TRACES_20:
8915     case TestType::RECURSIVE_TRACES_21:
8916     case TestType::RECURSIVE_TRACES_22:
8917     case TestType::RECURSIVE_TRACES_23:
8918     case TestType::RECURSIVE_TRACES_24:
8919     case TestType::RECURSIVE_TRACES_25:
8920     case TestType::RECURSIVE_TRACES_26:
8921     case TestType::RECURSIVE_TRACES_27:
8922     case TestType::RECURSIVE_TRACES_28:
8923     case TestType::RECURSIVE_TRACES_29:
8924     {
8925         const auto nLevels =
8926             ((m_data.type == TestType::RECURSIVE_TRACES_0) ?
8927                  0u :
8928                  (static_cast<uint32_t>(m_data.type) - static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + 1));
8929 
8930         m_testPtr.reset(new RecursiveTracesTest(m_data.geometryType, m_data.asLayout, nLevels));
8931 
8932         m_testPtr->initPrograms(programCollection);
8933 
8934         break;
8935     }
8936 
8937     case TestType::REPORT_INTERSECTION_RESULT:
8938     case TestType::USE_MEMORY_ACCESS:
8939     {
8940         m_testPtr.reset(new ReportIntersectionResultTest(m_data.asLayout, m_data.geometryType));
8941 
8942         m_testPtr->initPrograms(programCollection);
8943 
8944         break;
8945     }
8946 
8947     case TestType::RAY_PAYLOAD_IN:
8948     {
8949         m_testPtr.reset(new RayPayloadInTest(m_data.geometryType, m_data.asLayout));
8950 
8951         m_testPtr->initPrograms(programCollection);
8952 
8953         break;
8954     }
8955 
8956     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
8957     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
8958     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
8959     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
8960     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
8961     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
8962     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
8963     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
8964     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
8965     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
8966     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
8967     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
8968     case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
8969     case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
8970     case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
8971     case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
8972     case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
8973     case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
8974     case TestType::SHADER_RECORD_BLOCK_STD430_1:
8975     case TestType::SHADER_RECORD_BLOCK_STD430_2:
8976     case TestType::SHADER_RECORD_BLOCK_STD430_3:
8977     case TestType::SHADER_RECORD_BLOCK_STD430_4:
8978     case TestType::SHADER_RECORD_BLOCK_STD430_5:
8979     case TestType::SHADER_RECORD_BLOCK_STD430_6:
8980     {
8981         m_testPtr.reset(new ShaderRecordBlockTest(m_data.type, ShaderRecordBlockTest::getVarsToTest(m_data.type)));
8982 
8983         m_testPtr->initPrograms(programCollection);
8984 
8985         break;
8986     }
8987 
8988     case TestType::IGNORE_ANY_HIT_DYNAMICALLY:
8989     case TestType::IGNORE_ANY_HIT_STATICALLY:
8990     case TestType::TERMINATE_ANY_HIT_DYNAMICALLY:
8991     case TestType::TERMINATE_ANY_HIT_STATICALLY:
8992     case TestType::TERMINATE_INTERSECTION_DYNAMICALLY:
8993     case TestType::TERMINATE_INTERSECTION_STATICALLY:
8994     {
8995         m_testPtr.reset(new TerminationTest(TerminationTest::getModeFromTestType(m_data.type)));
8996 
8997         m_testPtr->initPrograms(programCollection);
8998 
8999         break;
9000     }
9001 
9002     default:
9003     {
9004         deAssertFail("This location should never be reached", __FILE__, __LINE__);
9005     }
9006     }
9007 }
9008 
createInstance(Context & context) const9009 TestInstance *RayTracingTestCase::createInstance(Context &context) const
9010 {
9011     switch (m_data.type)
9012     {
9013     case TestType::AABBS_AND_TRIS_IN_ONE_TL:
9014     {
9015         if (m_testPtr == nullptr)
9016         {
9017             m_testPtr.reset(new AABBTriTLTest(m_data.geometryType, m_data.asLayout));
9018         }
9019 
9020         break;
9021     }
9022 
9023     case TestType::AS_STRESS_TEST:
9024     {
9025         if (m_testPtr == nullptr)
9026         {
9027             m_testPtr.reset(new ASStressTest(m_data.geometryType, m_data.asLayout));
9028         }
9029 
9030         break;
9031     }
9032 
9033     case TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST:
9034     case TestType::CALLABLE_SHADER_STRESS_TEST:
9035     {
9036         if (m_testPtr == nullptr)
9037         {
9038             const bool useDynamicStackSize = (m_data.type == TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST);
9039 
9040             m_testPtr.reset(new CallableShaderStressTest(m_data.geometryType, m_data.asLayout, useDynamicStackSize));
9041         }
9042 
9043         break;
9044     }
9045 
9046     case TestType::CULL_MASK:
9047     case TestType::CULL_MASK_EXTRA_BITS:
9048     {
9049         if (m_testPtr == nullptr)
9050         {
9051             m_testPtr.reset(new CullMaskTest(m_data.asLayout, m_data.geometryType,
9052                                              (m_data.type == TestType::CULL_MASK_EXTRA_BITS)));
9053         }
9054 
9055         break;
9056     }
9057 
9058     case TestType::MAX_RAY_HIT_ATTRIBUTE_SIZE:
9059     {
9060         if (m_testPtr == nullptr)
9061         {
9062             m_testPtr.reset(new MAXRayHitAttributeSizeTest(m_data.geometryType, m_data.asLayout));
9063         }
9064 
9065         break;
9066     }
9067 
9068     case TestType::MAX_RT_INVOCATIONS_SUPPORTED:
9069     {
9070         if (m_testPtr == nullptr)
9071         {
9072             m_testPtr.reset(new MAXRTInvocationsSupportedTest(m_data.geometryType, m_data.asLayout));
9073         }
9074 
9075         break;
9076     }
9077 
9078     case TestType::NO_DUPLICATE_ANY_HIT:
9079     {
9080         if (m_testPtr == nullptr)
9081         {
9082             m_testPtr.reset(new NoDuplicateAnyHitTest(m_data.asLayout, m_data.geometryType));
9083         }
9084 
9085         break;
9086     }
9087 
9088     case TestType::RECURSIVE_TRACES_0:
9089     case TestType::RECURSIVE_TRACES_1:
9090     case TestType::RECURSIVE_TRACES_2:
9091     case TestType::RECURSIVE_TRACES_3:
9092     case TestType::RECURSIVE_TRACES_4:
9093     case TestType::RECURSIVE_TRACES_5:
9094     case TestType::RECURSIVE_TRACES_6:
9095     case TestType::RECURSIVE_TRACES_7:
9096     case TestType::RECURSIVE_TRACES_8:
9097     case TestType::RECURSIVE_TRACES_9:
9098     case TestType::RECURSIVE_TRACES_10:
9099     case TestType::RECURSIVE_TRACES_11:
9100     case TestType::RECURSIVE_TRACES_12:
9101     case TestType::RECURSIVE_TRACES_13:
9102     case TestType::RECURSIVE_TRACES_14:
9103     case TestType::RECURSIVE_TRACES_15:
9104     case TestType::RECURSIVE_TRACES_16:
9105     case TestType::RECURSIVE_TRACES_17:
9106     case TestType::RECURSIVE_TRACES_18:
9107     case TestType::RECURSIVE_TRACES_19:
9108     case TestType::RECURSIVE_TRACES_20:
9109     case TestType::RECURSIVE_TRACES_21:
9110     case TestType::RECURSIVE_TRACES_22:
9111     case TestType::RECURSIVE_TRACES_23:
9112     case TestType::RECURSIVE_TRACES_24:
9113     case TestType::RECURSIVE_TRACES_25:
9114     case TestType::RECURSIVE_TRACES_26:
9115     case TestType::RECURSIVE_TRACES_27:
9116     case TestType::RECURSIVE_TRACES_28:
9117     case TestType::RECURSIVE_TRACES_29:
9118     {
9119         const auto nLevels =
9120             ((m_data.type == TestType::RECURSIVE_TRACES_0) ?
9121                  0u :
9122                  (static_cast<uint32_t>(m_data.type) - static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + 1));
9123 
9124         if (m_testPtr == nullptr)
9125         {
9126             m_testPtr.reset(new RecursiveTracesTest(m_data.geometryType, m_data.asLayout, nLevels));
9127         }
9128 
9129         break;
9130     }
9131 
9132     case TestType::REPORT_INTERSECTION_RESULT:
9133     case TestType::USE_MEMORY_ACCESS:
9134     {
9135         if (m_testPtr == nullptr)
9136         {
9137             m_testPtr.reset(new ReportIntersectionResultTest(m_data.asLayout, m_data.geometryType));
9138         }
9139 
9140         break;
9141     }
9142 
9143     case TestType::RAY_PAYLOAD_IN:
9144     {
9145         if (m_testPtr == nullptr)
9146         {
9147             m_testPtr.reset(new RayPayloadInTest(m_data.geometryType, m_data.asLayout));
9148         }
9149 
9150         break;
9151     }
9152 
9153     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
9154     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
9155     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
9156     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
9157     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
9158     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
9159     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
9160     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
9161     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
9162     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
9163     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
9164     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
9165     case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
9166     case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
9167     case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
9168     case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
9169     case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
9170     case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
9171     case TestType::SHADER_RECORD_BLOCK_STD430_1:
9172     case TestType::SHADER_RECORD_BLOCK_STD430_2:
9173     case TestType::SHADER_RECORD_BLOCK_STD430_3:
9174     case TestType::SHADER_RECORD_BLOCK_STD430_4:
9175     case TestType::SHADER_RECORD_BLOCK_STD430_5:
9176     case TestType::SHADER_RECORD_BLOCK_STD430_6:
9177     {
9178         if (m_testPtr == nullptr)
9179         {
9180             m_testPtr.reset(new ShaderRecordBlockTest(m_data.type, ShaderRecordBlockTest::getVarsToTest(m_data.type)));
9181         }
9182 
9183         break;
9184     }
9185 
9186     case TestType::IGNORE_ANY_HIT_DYNAMICALLY:
9187     case TestType::IGNORE_ANY_HIT_STATICALLY:
9188     case TestType::TERMINATE_ANY_HIT_DYNAMICALLY:
9189     case TestType::TERMINATE_ANY_HIT_STATICALLY:
9190     case TestType::TERMINATE_INTERSECTION_DYNAMICALLY:
9191     case TestType::TERMINATE_INTERSECTION_STATICALLY:
9192     {
9193         if (m_testPtr == nullptr)
9194         {
9195             m_testPtr.reset(new TerminationTest(TerminationTest::getModeFromTestType(m_data.type)));
9196         }
9197 
9198         break;
9199     }
9200 
9201     default:
9202     {
9203         deAssertFail("This location should never be reached", __FILE__, __LINE__);
9204     }
9205     }
9206 
9207     auto newTestInstancePtr = new RayTracingMiscTestInstance(context, m_data, m_testPtr.get());
9208 
9209     return newTestInstancePtr;
9210 }
9211 
createMiscTests(tcu::TestContext & testCtx)9212 tcu::TestCaseGroup *createMiscTests(tcu::TestContext &testCtx)
9213 {
9214     de::MovePtr<tcu::TestCaseGroup> miscGroupPtr(
9215         // Miscellaneous ray-tracing tests
9216         new tcu::TestCaseGroup(testCtx, "misc"));
9217 
9218     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9219          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9220     {
9221         for (auto currentASLayout = AccelerationStructureLayout::FIRST;
9222              currentASLayout != AccelerationStructureLayout::COUNT;
9223              currentASLayout = static_cast<AccelerationStructureLayout>(static_cast<uint32_t>(currentASLayout) + 1))
9224         {
9225             for (uint32_t nIteration = 0; nIteration < 2; ++nIteration)
9226             {
9227                 const auto testType = (nIteration == 0) ? TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST :
9228                                                           TestType::CALLABLE_SHADER_STRESS_TEST;
9229                 const std::string newTestCaseName =
9230                     "callableshaderstress_" + de::toString(getSuffixForASLayout(currentASLayout)) + "_" +
9231                     de::toString(getSuffixForGeometryType(currentGeometryType)) + "_" +
9232                     ((testType == TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST) ? "dynamic" : "static");
9233 
9234                 // Verifies that the maximum ray hit attribute size property reported by the implementation is actually supported.
9235                 auto newTestCasePtr = new RayTracingTestCase(testCtx, newTestCaseName.data(),
9236                                                              CaseDef{testType, currentGeometryType, currentASLayout});
9237 
9238                 miscGroupPtr->addChild(newTestCasePtr);
9239             }
9240         }
9241     }
9242 
9243     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9244          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9245     {
9246         const std::string newTestCaseName =
9247             "AS_stresstest_" + de::toString(getSuffixForGeometryType(currentGeometryType));
9248 
9249         // Verifies raygen shader invocations can simultaneously access as many AS instances as reported
9250         auto newTestCasePtr =
9251             new RayTracingTestCase(testCtx, newTestCaseName.data(),
9252                                    CaseDef{TestType::AS_STRESS_TEST, currentGeometryType,
9253                                            AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY});
9254 
9255         miscGroupPtr->addChild(newTestCasePtr);
9256     }
9257 
9258     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9259          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9260     {
9261         for (int nUseExtraCullMaskBits = 0; nUseExtraCullMaskBits < 2 /* false, true */; ++nUseExtraCullMaskBits)
9262         {
9263             const std::string newTestCaseName = "cullmask_" +
9264                                                 de::toString(getSuffixForGeometryType(currentGeometryType)) +
9265                                                 de::toString((nUseExtraCullMaskBits) ? "_extrabits" : "");
9266             const auto testType = (nUseExtraCullMaskBits == 0) ? TestType::CULL_MASK : TestType::CULL_MASK_EXTRA_BITS;
9267 
9268             // Verifies cull mask works as specified
9269             auto newTestCasePtr = new RayTracingTestCase(
9270                 testCtx, newTestCaseName.data(),
9271                 CaseDef{testType, currentGeometryType, AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY});
9272 
9273             miscGroupPtr->addChild(newTestCasePtr);
9274         }
9275     }
9276 
9277     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9278          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9279     {
9280         const std::string newTestCaseName =
9281             "maxrtinvocations_" + de::toString(getSuffixForGeometryType(currentGeometryType));
9282 
9283         // Verifies top-level acceleration structures built of AABB and triangle bottom-level AS instances work as expected
9284         auto newTestCasePtr =
9285             new RayTracingTestCase(testCtx, newTestCaseName.data(),
9286                                    CaseDef{TestType::MAX_RT_INVOCATIONS_SUPPORTED, currentGeometryType,
9287                                            AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9288 
9289         miscGroupPtr->addChild(newTestCasePtr);
9290     }
9291 
9292     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9293          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9294     {
9295         for (auto currentASLayout = AccelerationStructureLayout::FIRST;
9296              currentASLayout != AccelerationStructureLayout::COUNT;
9297              currentASLayout = static_cast<AccelerationStructureLayout>(static_cast<uint32_t>(currentASLayout) + 1))
9298         {
9299             const std::string newTestCaseName = "NO_DUPLICATE_ANY_HIT_" +
9300                                                 de::toString(getSuffixForASLayout(currentASLayout)) + "_" +
9301                                                 de::toString(getSuffixForGeometryType(currentGeometryType));
9302 
9303             // Verifies the NO_DUPLICATE_ANY_HIT flag is adhered to when tracing rays
9304             auto newTestCasePtr =
9305                 new RayTracingTestCase(testCtx, newTestCaseName.data(),
9306                                        CaseDef{TestType::NO_DUPLICATE_ANY_HIT, currentGeometryType, currentASLayout});
9307 
9308             miscGroupPtr->addChild(newTestCasePtr);
9309         }
9310     }
9311 
9312     {
9313         // Verifies top-level acceleration structures built of AABB and triangle bottom-level AS instances work as expected
9314         auto newTestCasePtr = new RayTracingTestCase(
9315             testCtx, "mixedPrimTL",
9316             CaseDef{TestType::AABBS_AND_TRIS_IN_ONE_TL, GeometryType::AABB_AND_TRIANGLES,
9317                     AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES});
9318 
9319         miscGroupPtr->addChild(newTestCasePtr);
9320     }
9321 
9322     for (auto currentASLayout = AccelerationStructureLayout::FIRST;
9323          currentASLayout != AccelerationStructureLayout::COUNT;
9324          currentASLayout = static_cast<AccelerationStructureLayout>(static_cast<uint32_t>(currentASLayout) + 1))
9325     {
9326         const std::string newTestCaseName =
9327             "maxrayhitattributesize_" + de::toString(getSuffixForASLayout(currentASLayout));
9328 
9329         // Verifies that the maximum ray hit attribute size property reported by the implementation is actually supported.
9330         auto newTestCasePtr = new RayTracingTestCase(testCtx, newTestCaseName.data(),
9331                                                      CaseDef{TestType::MAX_RAY_HIT_ATTRIBUTE_SIZE, GeometryType::AABB,
9332                                                              AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9333 
9334         miscGroupPtr->addChild(newTestCasePtr);
9335     }
9336 
9337     {
9338         // Test the return value of reportIntersectionEXT
9339         auto newTestCase1Ptr = new RayTracingTestCase(testCtx, "report_intersection_result",
9340                                                       CaseDef{TestType::REPORT_INTERSECTION_RESULT, GeometryType::AABB,
9341                                                               AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9342         // Test replacing VK_ACCESS_*_WRITE/READ_BIT with VK_ACCESS_MEMORY_WRITE/READ_BIT.
9343         auto newTestCase2Ptr = new RayTracingTestCase(testCtx, "memory_access",
9344                                                       CaseDef{TestType::USE_MEMORY_ACCESS, GeometryType::AABB,
9345                                                               AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9346 
9347         miscGroupPtr->addChild(newTestCase1Ptr);
9348         miscGroupPtr->addChild(newTestCase2Ptr);
9349     }
9350 
9351     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9352          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9353     {
9354         const std::string newTestCaseName =
9355             "raypayloadin_" + de::toString(getSuffixForGeometryType(currentGeometryType));
9356 
9357         // Verifies that relevant shader stages can correctly read large ray payloads provided by raygen shader stage.
9358         auto newTestCasePtr = new RayTracingTestCase(testCtx, newTestCaseName.data(),
9359                                                      CaseDef{TestType::RAY_PAYLOAD_IN, currentGeometryType,
9360                                                              AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9361         miscGroupPtr->addChild(newTestCasePtr);
9362     }
9363 
9364     {
9365         // Tests usage of various variables inside a shader record block using std430 layout
9366         auto newTestCaseSTD430_1Ptr =
9367             new RayTracingTestCase(testCtx, "shaderRecordSTD430_1", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_1));
9368         auto newTestCaseSTD430_2Ptr =
9369             new RayTracingTestCase(testCtx, "shaderRecordSTD430_2", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_2));
9370         auto newTestCaseSTD430_3Ptr =
9371             new RayTracingTestCase(testCtx, "shaderRecordSTD430_3", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_3));
9372         auto newTestCaseSTD430_4Ptr =
9373             new RayTracingTestCase(testCtx, "shaderRecordSTD430_4", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_4));
9374         auto newTestCaseSTD430_5Ptr =
9375             new RayTracingTestCase(testCtx, "shaderRecordSTD430_5", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_5));
9376         auto newTestCaseSTD430_6Ptr =
9377             new RayTracingTestCase(testCtx, "shaderRecordSTD430_6", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_6));
9378 
9379         // Tests usage of various variables inside a shader record block using scalar layout
9380         auto newTestCaseScalar_1Ptr =
9381             new RayTracingTestCase(testCtx, "shaderRecordScalar_1", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_1));
9382         auto newTestCaseScalar_2Ptr =
9383             new RayTracingTestCase(testCtx, "shaderRecordScalar_2", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_2));
9384         auto newTestCaseScalar_3Ptr =
9385             new RayTracingTestCase(testCtx, "shaderRecordScalar_3", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_3));
9386         auto newTestCaseScalar_4Ptr =
9387             new RayTracingTestCase(testCtx, "shaderRecordScalar_4", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_4));
9388         auto newTestCaseScalar_5Ptr =
9389             new RayTracingTestCase(testCtx, "shaderRecordScalar_5", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_5));
9390         auto newTestCaseScalar_6Ptr =
9391             new RayTracingTestCase(testCtx, "shaderRecordScalar_6", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_6));
9392 
9393         // Tests usage of various variables inside a shader record block using scalar layout and explicit offset qualifiers
9394         auto newTestCaseExplicitScalarOffset_1Ptr =
9395             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_1",
9396                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1));
9397         auto newTestCaseExplicitScalarOffset_2Ptr =
9398             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_2",
9399                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2));
9400         auto newTestCaseExplicitScalarOffset_3Ptr =
9401             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_3",
9402                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3));
9403         auto newTestCaseExplicitScalarOffset_4Ptr =
9404             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_4",
9405                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4));
9406         auto newTestCaseExplicitScalarOffset_5Ptr =
9407             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_5",
9408                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5));
9409         auto newTestCaseExplicitScalarOffset_6Ptr =
9410             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_6",
9411                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6));
9412 
9413         // Tests usage of various variables inside a shader record block using std430 layout and explicit offset qualifiers
9414         auto newTestCaseExplicitSTD430Offset_1Ptr =
9415             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_1",
9416                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1));
9417         auto newTestCaseExplicitSTD430Offset_2Ptr =
9418             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_2",
9419                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2));
9420         auto newTestCaseExplicitSTD430Offset_3Ptr =
9421             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_3",
9422                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3));
9423         auto newTestCaseExplicitSTD430Offset_4Ptr =
9424             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_4",
9425                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4));
9426         auto newTestCaseExplicitSTD430Offset_5Ptr =
9427             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_5",
9428                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5));
9429         auto newTestCaseExplicitSTD430Offset_6Ptr =
9430             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_6",
9431                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6));
9432         miscGroupPtr->addChild(newTestCaseSTD430_1Ptr);
9433         miscGroupPtr->addChild(newTestCaseSTD430_2Ptr);
9434         miscGroupPtr->addChild(newTestCaseSTD430_3Ptr);
9435         miscGroupPtr->addChild(newTestCaseSTD430_4Ptr);
9436         miscGroupPtr->addChild(newTestCaseSTD430_5Ptr);
9437         miscGroupPtr->addChild(newTestCaseSTD430_6Ptr);
9438 
9439         miscGroupPtr->addChild(newTestCaseScalar_1Ptr);
9440         miscGroupPtr->addChild(newTestCaseScalar_2Ptr);
9441         miscGroupPtr->addChild(newTestCaseScalar_3Ptr);
9442         miscGroupPtr->addChild(newTestCaseScalar_4Ptr);
9443         miscGroupPtr->addChild(newTestCaseScalar_5Ptr);
9444         miscGroupPtr->addChild(newTestCaseScalar_6Ptr);
9445 
9446         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_1Ptr);
9447         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_2Ptr);
9448         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_3Ptr);
9449         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_4Ptr);
9450         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_5Ptr);
9451         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_6Ptr);
9452 
9453         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_1Ptr);
9454         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_2Ptr);
9455         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_3Ptr);
9456         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_4Ptr);
9457         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_5Ptr);
9458         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_6Ptr);
9459     }
9460 
9461     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9462          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9463     {
9464         const std::string newTestCaseName =
9465             "recursiveTraces_" + de::toString(getSuffixForGeometryType(currentGeometryType)) + "_";
9466 
9467         // 0 recursion levels.
9468         {
9469             // Verifies that relevant shader stages can correctly read large ray payloads provided by raygen shader stage.
9470             auto newTestCasePtr =
9471                 new RayTracingTestCase(testCtx, (newTestCaseName + "0").data(),
9472                                        CaseDef{TestType::RECURSIVE_TRACES_0, currentGeometryType,
9473                                                AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9474 
9475             miscGroupPtr->addChild(newTestCasePtr);
9476         }
9477 
9478         // TODO: for (uint32_t nLevels = 1; nLevels <= 29; ++nLevels)
9479         for (uint32_t nLevels = 1; nLevels <= 15; ++nLevels)
9480         {
9481             // Verifies that relevant shader stages can correctly read large ray payloads provided by raygen shader stage.
9482             auto newTestCasePtr = new RayTracingTestCase(
9483                 testCtx, (newTestCaseName + de::toString(nLevels)).data(),
9484                 CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + (nLevels - 1)),
9485                         currentGeometryType, AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9486 
9487             miscGroupPtr->addChild(newTestCasePtr);
9488         }
9489     }
9490 
9491     {
9492         // Verifies that OpIgnoreIntersectionKHR works as per spec (static invocations).
9493         auto newTestCase1Ptr = new RayTracingTestCase(
9494             testCtx, "OpIgnoreIntersectionKHR_AnyHitStatically",
9495             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::IGNORE_ANY_HIT_STATICALLY)),
9496                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9497         // Verifies that OpIgnoreIntersectionKHR works as per spec (dynamic invocations).
9498         auto newTestCase2Ptr = new RayTracingTestCase(
9499             testCtx, "OpIgnoreIntersectionKHR_AnyHitDynamically",
9500             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::IGNORE_ANY_HIT_DYNAMICALLY)),
9501                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9502         // Verifies that OpTerminateRayKHR works as per spec (static invocations).
9503         auto newTestCase3Ptr = new RayTracingTestCase(
9504             testCtx, "OpTerminateRayKHR_AnyHitStatically",
9505             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_ANY_HIT_STATICALLY)),
9506                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9507         // Verifies that OpTerminateRayKHR works as per spec (dynamic invocations).
9508         auto newTestCase4Ptr = new RayTracingTestCase(
9509             testCtx, "OpTerminateRayKHR_AnyHitDynamically",
9510             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_ANY_HIT_DYNAMICALLY)),
9511                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9512         // Verifies that OpTerminateRayKHR works as per spec (static invocations).
9513         auto newTestCase5Ptr = new RayTracingTestCase(
9514             testCtx, "OpTerminateRayKHR_IntersectionStatically",
9515             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_INTERSECTION_STATICALLY)),
9516                     GeometryType::AABB, AccelerationStructureLayout::COUNT});
9517         // Verifies that OpTerminateRayKHR works as per spec (dynamic invocations).
9518         auto newTestCase6Ptr = new RayTracingTestCase(
9519             testCtx, "OpTerminateRayKHR_IntersectionDynamically",
9520             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_INTERSECTION_DYNAMICALLY)),
9521                     GeometryType::AABB, AccelerationStructureLayout::COUNT});
9522 
9523         miscGroupPtr->addChild(newTestCase1Ptr);
9524         miscGroupPtr->addChild(newTestCase2Ptr);
9525         miscGroupPtr->addChild(newTestCase3Ptr);
9526         miscGroupPtr->addChild(newTestCase4Ptr);
9527         miscGroupPtr->addChild(newTestCase5Ptr);
9528         miscGroupPtr->addChild(newTestCase6Ptr);
9529     }
9530 
9531     {
9532         addFunctionCaseWithPrograms(miscGroupPtr.get(), "null_miss", checkRTPipelineSupport, initBasicHitBufferPrograms,
9533                                     nullMissInstance);
9534         addFunctionCaseWithPrograms(miscGroupPtr.get(), "reuse_creation_buffer_top", checkReuseCreationBufferSupport,
9535                                     initReuseCreationBufferPrograms, reuseCreationBufferInstance, true /*top*/);
9536         addFunctionCaseWithPrograms(miscGroupPtr.get(), "reuse_creation_buffer_bottom", checkReuseCreationBufferSupport,
9537                                     initReuseCreationBufferPrograms, reuseCreationBufferInstance, false /*top*/);
9538     }
9539 
9540     return miscGroupPtr.release();
9541 }
9542 
9543 } // namespace RayTracing
9544 } // namespace vkt
9545