1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Acceleration Structures tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingAccelerationStructuresTests.hpp"
25 
26 #include "vkDefs.hpp"
27 #include "deClock.h"
28 #include "deRandom.h"
29 
30 #include "vktTestCase.hpp"
31 #include "vktTestGroupUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkBarrierUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkImageWithMemory.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkImageUtil.hpp"
40 #include "vkRayTracingUtil.hpp"
41 #include "tcuVectorUtil.hpp"
42 #include "tcuTexture.hpp"
43 #include "tcuTestLog.hpp"
44 #include "tcuImageCompare.hpp"
45 #include "tcuFloat.hpp"
46 #include "deModularCounter.hpp"
47 
48 #include <cmath>
49 #include <cstddef>
50 #include <set>
51 #include <limits>
52 #include <iostream>
53 
54 namespace vkt
55 {
56 namespace RayTracing
57 {
58 namespace
59 {
60 using namespace vk;
61 using namespace vkt;
62 using namespace tcu;
63 
64 static const VkFlags ALL_RAY_TRACING_STAGES = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
65                                               VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
66                                               VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
67 
68 enum class BottomTestType
69 {
70     TRIANGLES = 0,
71     AABBS     = 1,
72 };
73 
74 enum class TopTestType
75 {
76     IDENTICAL_INSTANCES,
77     DIFFERENT_INSTANCES,
78     UPDATED_INSTANCES,
79     MIX_INSTANCES,
80 };
81 
82 enum OperationTarget
83 {
84     OT_NONE,
85     OT_TOP_ACCELERATION,
86     OT_BOTTOM_ACCELERATION
87 };
88 
89 enum OperationType
90 {
91     OP_NONE,
92     OP_COPY,
93     OP_COMPACT,
94     OP_SERIALIZE,
95     OP_UPDATE,
96     OP_UPDATE_IN_PLACE
97 };
98 
99 enum class InstanceCullFlags
100 {
101     NONE,
102     CULL_DISABLE,
103     COUNTERCLOCKWISE,
104     ALL,
105 };
106 
107 enum class EmptyAccelerationStructureCase
108 {
109     NOT_EMPTY            = 0,
110     INACTIVE_TRIANGLES   = 1,
111     INACTIVE_INSTANCES   = 2,
112     NO_GEOMETRIES_BOTTOM = 3, // geometryCount zero when building.
113     NO_PRIMITIVES_BOTTOM = 4, // primitiveCount zero when building.
114     NO_PRIMITIVES_TOP    = 5, // primitiveCount zero when building.
115 };
116 
117 enum class InstanceCustomIndexCase
118 {
119     NONE         = 0,
120     CLOSEST_HIT  = 1,
121     ANY_HIT      = 2,
122     INTERSECTION = 3,
123 };
124 
125 enum class UpdateCase
126 {
127     NONE,
128     VERTICES,
129     INDICES,
130     TRANSFORM
131 };
132 
133 static const uint32_t RTAS_DEFAULT_SIZE = 8u;
134 
135 // Chosen to have the most significant bit set to 1 when represented using 24 bits.
136 // This will make sure the instance custom index will not be sign-extended by mistake.
137 constexpr uint32_t INSTANCE_CUSTOM_INDEX_BASE = 0x807f00u;
138 
139 struct TestParams;
140 
141 class TestConfiguration
142 {
143 public:
~TestConfiguration()144     virtual ~TestConfiguration()
145     {
146     }
147 
148     virtual std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> initBottomAccelerationStructures(
149         Context &context, TestParams &testParams) = 0;
150     virtual de::MovePtr<TopLevelAccelerationStructure> initTopAccelerationStructure(
151         Context &context, TestParams &testParams,
152         std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures) = 0;
153     virtual void initRayTracingShaders(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
154                                        TestParams &testParams)                                           = 0;
155     virtual void initShaderBindingTables(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
156                                          TestParams &testParams, VkPipeline pipeline, uint32_t shaderGroupHandleSize,
157                                          uint32_t shaderGroupBaseAlignment,
158                                          de::MovePtr<BufferWithMemory> &raygenShaderBindingTable,
159                                          de::MovePtr<BufferWithMemory> &hitShaderBindingTable,
160                                          de::MovePtr<BufferWithMemory> &missShaderBindingTable)          = 0;
161     virtual bool verifyImage(BufferWithMemory *resultBuffer, Context &context, TestParams &testParams)   = 0;
162     virtual VkFormat getResultImageFormat()                                                              = 0;
163     virtual size_t getResultImageFormatSize()                                                            = 0;
164     virtual VkClearValue getClearValue()                                                                 = 0;
165 };
166 
167 struct TestParams
168 {
169     vk::VkAccelerationStructureBuildTypeKHR buildType; // are we making AS on CPU or GPU
170     VkFormat vertexFormat;
171     bool padVertices;
172     VkIndexType indexType;
173     BottomTestType bottomTestType; // what kind of geometry is stored in bottom AS
174     InstanceCullFlags cullFlags;   // Flags for instances, if needed.
175     bool bottomUsesAOP;            // does bottom AS use arrays, or arrays of pointers
176     bool bottomGeneric;            // Bottom created as generic AS type.
177     bool bottomUnboundedCreation;  // Bottom created with unbounded buffer memory.
178     TopTestType topTestType;   // If instances are identical then bottom geometries must have different vertices/aabbs
179     bool topUsesAOP;           // does top AS use arrays, or arrays of pointers
180     bool topGeneric;           // Top created as generic AS type.
181     bool topUnboundedCreation; // Top created with unbounded buffer memory.
182     VkBuildAccelerationStructureFlagsKHR buildFlags;
183     OperationTarget operationTarget;
184     OperationType operationType;
185     uint32_t width;
186     uint32_t height;
187     de::SharedPtr<TestConfiguration> testConfiguration;
188     uint32_t workerThreadsCount;
189     EmptyAccelerationStructureCase emptyASCase;
190     InstanceCustomIndexCase instanceCustomIndexCase;
191     bool useCullMask;
192     uint32_t cullMask;
193     UpdateCase updateCase;
194 };
195 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)196 uint32_t getShaderGroupSize(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
197 {
198     de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR;
199 
200     rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
201     return rayTracingPropertiesKHR->getShaderGroupHandleSize();
202 }
203 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)204 uint32_t getShaderGroupBaseAlignment(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
205 {
206     de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR;
207 
208     rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
209     return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
210 }
211 
makeImageCreateInfo(uint32_t width,uint32_t height,VkFormat format)212 VkImageCreateInfo makeImageCreateInfo(uint32_t width, uint32_t height, VkFormat format)
213 {
214     const VkImageCreateInfo imageCreateInfo = {
215         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
216         DE_NULL,                             // const void* pNext;
217         (VkImageCreateFlags)0u,              // VkImageCreateFlags flags;
218         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
219         format,                              // VkFormat format;
220         makeExtent3D(width, height, 1u),     // VkExtent3D extent;
221         1u,                                  // uint32_t mipLevels;
222         1u,                                  // uint32_t arrayLayers;
223         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
224         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
225         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
226             VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
227         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
228         0u,                                  // uint32_t queueFamilyIndexCount;
229         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
230         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
231     };
232 
233     return imageCreateInfo;
234 }
235 
makeQueryPool(const DeviceInterface & vk,const VkDevice device,const VkQueryType queryType,uint32_t queryCount)236 Move<VkQueryPool> makeQueryPool(const DeviceInterface &vk, const VkDevice device, const VkQueryType queryType,
237                                 uint32_t queryCount)
238 {
239     const VkQueryPoolCreateInfo queryPoolCreateInfo = {
240         VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // sType
241         DE_NULL,                                  // pNext
242         (VkQueryPoolCreateFlags)0,                // flags
243         queryType,                                // queryType
244         queryCount,                               // queryCount
245         0u,                                       // pipelineStatistics
246     };
247     return createQueryPool(vk, device, &queryPoolCreateInfo);
248 }
249 
getCullFlags(InstanceCullFlags flags)250 VkGeometryInstanceFlagsKHR getCullFlags(InstanceCullFlags flags)
251 {
252     VkGeometryInstanceFlagsKHR cullFlags = 0u;
253 
254     if (flags == InstanceCullFlags::CULL_DISABLE || flags == InstanceCullFlags::ALL)
255         cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
256 
257     if (flags == InstanceCullFlags::COUNTERCLOCKWISE || flags == InstanceCullFlags::ALL)
258         cullFlags |= VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR;
259 
260     return cullFlags;
261 }
262 
263 class CheckerboardConfiguration : public TestConfiguration
264 {
265 public:
266     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> initBottomAccelerationStructures(
267         Context &context, TestParams &testParams) override;
268     de::MovePtr<TopLevelAccelerationStructure> initTopAccelerationStructure(
269         Context &context, TestParams &testParams,
270         std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures) override;
271     void initRayTracingShaders(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
272                                TestParams &testParams) override;
273     void initShaderBindingTables(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
274                                  TestParams &testParams, VkPipeline pipeline, uint32_t shaderGroupHandleSize,
275                                  uint32_t shaderGroupBaseAlignment,
276                                  de::MovePtr<BufferWithMemory> &raygenShaderBindingTable,
277                                  de::MovePtr<BufferWithMemory> &hitShaderBindingTable,
278                                  de::MovePtr<BufferWithMemory> &missShaderBindingTable) override;
279     bool verifyImage(BufferWithMemory *resultBuffer, Context &context, TestParams &testParams) override;
280     VkFormat getResultImageFormat() override;
281     size_t getResultImageFormatSize() override;
282     VkClearValue getClearValue() override;
283 };
284 
285 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> CheckerboardConfiguration::
initBottomAccelerationStructures(Context & context,TestParams & testParams)286     initBottomAccelerationStructures(Context &context, TestParams &testParams)
287 {
288     DE_UNREF(context);
289 
290     // Cull flags can only be used with triangles.
291     DE_ASSERT(testParams.cullFlags == InstanceCullFlags::NONE ||
292               testParams.bottomTestType == BottomTestType::TRIANGLES);
293 
294     // Checkerboard configuration does not support empty geometry tests.
295     DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
296 
297     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> result;
298 
299     const auto instanceFlags = getCullFlags(testParams.cullFlags);
300 
301     tcu::Vec3 v0(0.0, 1.0, 0.0);
302     tcu::Vec3 v1(0.0, 0.0, 0.0);
303     tcu::Vec3 v2(1.0, 1.0, 0.0);
304     tcu::Vec3 v3(1.0, 0.0, 0.0);
305 
306     if (testParams.topTestType == TopTestType::DIFFERENT_INSTANCES)
307     {
308         de::MovePtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
309             makeBottomLevelAccelerationStructure();
310         bottomLevelAccelerationStructure->setGeometryCount(1u);
311         de::SharedPtr<RaytracedGeometryBase> geometry;
312         if (testParams.bottomTestType == BottomTestType::TRIANGLES)
313         {
314             geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat,
315                                              testParams.indexType, testParams.padVertices);
316             if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
317             {
318                 if (instanceFlags == 0u)
319                 {
320                     geometry->addVertex(v0);
321                     geometry->addVertex(v1);
322                     geometry->addVertex(v2);
323                     geometry->addVertex(v2);
324                     geometry->addVertex(v1);
325                     geometry->addVertex(v3);
326                 }
327                 else // Counterclockwise so the flags will be needed for the geometry to be visible.
328                 {
329                     geometry->addVertex(v2);
330                     geometry->addVertex(v1);
331                     geometry->addVertex(v0);
332                     geometry->addVertex(v3);
333                     geometry->addVertex(v1);
334                     geometry->addVertex(v2);
335                 }
336             }
337             else // m_data.indexType != VK_INDEX_TYPE_NONE_KHR
338             {
339                 geometry->addVertex(v0);
340                 geometry->addVertex(v1);
341                 geometry->addVertex(v2);
342                 geometry->addVertex(v3);
343 
344                 if (instanceFlags == 0u)
345                 {
346                     geometry->addIndex(0);
347                     geometry->addIndex(1);
348                     geometry->addIndex(2);
349                     geometry->addIndex(2);
350                     geometry->addIndex(1);
351                     geometry->addIndex(3);
352                 }
353                 else // Counterclockwise so the flags will be needed for the geometry to be visible.
354                 {
355                     geometry->addIndex(2);
356                     geometry->addIndex(1);
357                     geometry->addIndex(0);
358                     geometry->addIndex(3);
359                     geometry->addIndex(1);
360                     geometry->addIndex(2);
361                 }
362             }
363         }
364         else // m_data.bottomTestType == BTT_AABBS
365         {
366             geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat, testParams.indexType,
367                                              testParams.padVertices);
368 
369             if (!testParams.padVertices)
370             {
371                 // Single AABB.
372                 geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
373                 geometry->addVertex(tcu::Vec3(1.0f, 1.0f, 0.1f));
374             }
375             else
376             {
377                 // Multiple AABBs covering the same space.
378                 geometry->addVertex(tcu::Vec3(0.0f, 0.0f, -0.1f));
379                 geometry->addVertex(tcu::Vec3(0.5f, 0.5f, 0.1f));
380 
381                 geometry->addVertex(tcu::Vec3(0.5f, 0.5f, -0.1f));
382                 geometry->addVertex(tcu::Vec3(1.0f, 1.0f, 0.1f));
383 
384                 geometry->addVertex(tcu::Vec3(0.0f, 0.5f, -0.1f));
385                 geometry->addVertex(tcu::Vec3(0.5f, 1.0f, 0.1f));
386 
387                 geometry->addVertex(tcu::Vec3(0.5f, 0.0f, -0.1f));
388                 geometry->addVertex(tcu::Vec3(1.0f, 0.5f, 0.1f));
389             }
390         }
391 
392         bottomLevelAccelerationStructure->addGeometry(geometry);
393 
394         if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
395             geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
396 
397         result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
398     }
399     else // m_data.topTestType == TTT_IDENTICAL_INSTANCES
400     {
401         // triangle and aabb tests use geometries/aabbs with different vertex positions and the same identity matrix in each instance data
402         for (uint32_t y = 0; y < testParams.height; ++y)
403             for (uint32_t x = 0; x < testParams.width; ++x)
404             {
405                 // let's build a chessboard of geometries
406                 if (((x + y) % 2) == 0)
407                     continue;
408                 tcu::Vec3 xyz((float)x, (float)y, 0.0f);
409 
410                 de::MovePtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
411                     makeBottomLevelAccelerationStructure();
412                 bottomLevelAccelerationStructure->setGeometryCount(1u);
413 
414                 de::SharedPtr<RaytracedGeometryBase> geometry;
415                 if (testParams.bottomTestType == BottomTestType::TRIANGLES)
416                 {
417                     geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat,
418                                                      testParams.indexType, testParams.padVertices);
419                     if (testParams.indexType == VK_INDEX_TYPE_NONE_KHR)
420                     {
421                         if (instanceFlags == 0u)
422                         {
423                             geometry->addVertex(xyz + v0);
424                             geometry->addVertex(xyz + v1);
425                             geometry->addVertex(xyz + v2);
426                             geometry->addVertex(xyz + v2);
427                             geometry->addVertex(xyz + v1);
428                             geometry->addVertex(xyz + v3);
429                         }
430                         else // Counterclockwise so the flags will be needed for the geometry to be visible.
431                         {
432                             geometry->addVertex(xyz + v2);
433                             geometry->addVertex(xyz + v1);
434                             geometry->addVertex(xyz + v0);
435                             geometry->addVertex(xyz + v3);
436                             geometry->addVertex(xyz + v1);
437                             geometry->addVertex(xyz + v2);
438                         }
439                     }
440                     else
441                     {
442                         geometry->addVertex(xyz + v0);
443                         geometry->addVertex(xyz + v1);
444                         geometry->addVertex(xyz + v2);
445                         geometry->addVertex(xyz + v3);
446 
447                         if (instanceFlags == 0u)
448                         {
449                             geometry->addIndex(0);
450                             geometry->addIndex(1);
451                             geometry->addIndex(2);
452                             geometry->addIndex(2);
453                             geometry->addIndex(1);
454                             geometry->addIndex(3);
455                         }
456                         else // Counterclockwise so the flags will be needed for the geometry to be visible.
457                         {
458                             geometry->addIndex(2);
459                             geometry->addIndex(1);
460                             geometry->addIndex(0);
461                             geometry->addIndex(3);
462                             geometry->addIndex(1);
463                             geometry->addIndex(2);
464                         }
465                     }
466                 }
467                 else // testParams.bottomTestType == BTT_AABBS
468                 {
469                     geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_AABBS_KHR, testParams.vertexFormat,
470                                                      testParams.indexType, testParams.padVertices);
471 
472                     if (!testParams.padVertices)
473                     {
474                         // Single AABB.
475                         geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
476                         geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f, 0.1f));
477                     }
478                     else
479                     {
480                         // Multiple AABBs covering the same space.
481                         geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.0f, -0.1f));
482                         geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f, 0.1f));
483 
484                         geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.5f, -0.1f));
485                         geometry->addVertex(xyz + tcu::Vec3(1.0f, 1.0f, 0.1f));
486 
487                         geometry->addVertex(xyz + tcu::Vec3(0.0f, 0.5f, -0.1f));
488                         geometry->addVertex(xyz + tcu::Vec3(0.5f, 1.0f, 0.1f));
489 
490                         geometry->addVertex(xyz + tcu::Vec3(0.5f, 0.0f, -0.1f));
491                         geometry->addVertex(xyz + tcu::Vec3(1.0f, 0.5f, 0.1f));
492                     }
493                 }
494 
495                 bottomLevelAccelerationStructure->addGeometry(geometry);
496 
497                 if (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT)
498                     geometry->setGeometryFlags(VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR);
499 
500                 result.push_back(
501                     de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
502             }
503     }
504 
505     return result;
506 }
507 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)508 de::MovePtr<TopLevelAccelerationStructure> CheckerboardConfiguration::initTopAccelerationStructure(
509     Context &context, TestParams &testParams,
510     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures)
511 {
512     // Checkerboard configuration does not support empty geometry tests.
513     DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
514 
515     DE_UNREF(context);
516 
517     const auto instanceCount = testParams.width * testParams.height / 2u;
518     const auto instanceFlags = getCullFlags(testParams.cullFlags);
519 
520     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
521     result->setInstanceCount(instanceCount);
522 
523     if (testParams.topTestType == TopTestType::DIFFERENT_INSTANCES)
524     {
525 
526         for (uint32_t y = 0; y < testParams.height; ++y)
527             for (uint32_t x = 0; x < testParams.width; ++x)
528             {
529                 if (((x + y) % 2) == 0)
530                     continue;
531                 const VkTransformMatrixKHR transformMatrixKHR = {{
532                     //  float matrix[3][4];
533                     {1.0f, 0.0f, 0.0f, (float)x},
534                     {0.0f, 1.0f, 0.0f, (float)y},
535                     {0.0f, 0.0f, 1.0f, 0.0f},
536                 }};
537                 const uint32_t instanceCustomIndex =
538                     ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ?
539                          (INSTANCE_CUSTOM_INDEX_BASE + x + y) :
540                          0u);
541                 result->addInstance(bottomLevelAccelerationStructures[0], transformMatrixKHR, instanceCustomIndex,
542                                     0xFFu, 0u, instanceFlags);
543             }
544     }
545     else // testParams.topTestType == TTT_IDENTICAL_INSTANCES
546     {
547         uint32_t currentInstanceIndex = 0;
548 
549         for (uint32_t y = 0; y < testParams.height; ++y)
550             for (uint32_t x = 0; x < testParams.width; ++x)
551             {
552                 if (((x + y) % 2) == 0)
553                     continue;
554                 const uint32_t instanceCustomIndex =
555                     ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ?
556                          (INSTANCE_CUSTOM_INDEX_BASE + x + y) :
557                          0u);
558 
559                 if (testParams.useCullMask)
560                 {
561                     result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++], identityMatrix3x4,
562                                         instanceCustomIndex, testParams.cullMask, 0u, instanceFlags);
563                 }
564                 else
565                 {
566                     result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++], identityMatrix3x4,
567                                         instanceCustomIndex, 0xFFu, 0u, instanceFlags);
568                 }
569             }
570     }
571 
572     return result;
573 }
574 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)575 void CheckerboardConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline> &rayTracingPipeline,
576                                                       Context &context, TestParams &testParams)
577 {
578     DE_UNREF(testParams);
579     const DeviceInterface &vkd = context.getDeviceInterface();
580     const VkDevice device      = context.getDevice();
581 
582     const bool useAnyHit      = (testParams.instanceCustomIndexCase == InstanceCustomIndexCase::ANY_HIT);
583     const auto hitShaderStage = (useAnyHit ? VK_SHADER_STAGE_ANY_HIT_BIT_KHR : VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
584     const auto hitShaderName  = (useAnyHit ? "ahit" : "chit");
585 
586     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,
587                                   createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"), 0), 0);
588     rayTracingPipeline->addShader(
589         hitShaderStage, createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName), 0), 1);
590     rayTracingPipeline->addShader(
591         hitShaderStage, createShaderModule(vkd, device, context.getBinaryCollection().get(hitShaderName), 0), 2);
592     if (testParams.bottomTestType == BottomTestType::AABBS)
593         rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
594                                       createShaderModule(vkd, device, context.getBinaryCollection().get("isect"), 0),
595                                       2);
596     rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,
597                                   createShaderModule(vkd, device, context.getBinaryCollection().get("miss"), 0), 3);
598 }
599 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,uint32_t shaderGroupHandleSize,uint32_t shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)600 void CheckerboardConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline> &rayTracingPipeline,
601                                                         Context &context, TestParams &testParams, VkPipeline pipeline,
602                                                         uint32_t shaderGroupHandleSize,
603                                                         uint32_t shaderGroupBaseAlignment,
604                                                         de::MovePtr<BufferWithMemory> &raygenShaderBindingTable,
605                                                         de::MovePtr<BufferWithMemory> &hitShaderBindingTable,
606                                                         de::MovePtr<BufferWithMemory> &missShaderBindingTable)
607 {
608     const DeviceInterface &vkd = context.getDeviceInterface();
609     const VkDevice device      = context.getDevice();
610     Allocator &allocator       = context.getDefaultAllocator();
611 
612     raygenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
613         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
614     if (testParams.bottomTestType == BottomTestType::AABBS)
615         hitShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
616             vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
617     else // testParams.bottomTestType == BTT_TRIANGLES
618         hitShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
619             vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
620     missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
621         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 3, 1);
622 }
623 
bitfieldReverse(uint32_t num)624 uint32_t bitfieldReverse(uint32_t num)
625 {
626     uint32_t reverse_num = 0;
627     uint32_t i;
628     for (i = 0; i < 32; i++)
629     {
630         if ((num & (1 << i)))
631             reverse_num |= 1 << ((32 - 1) - i);
632     }
633     return reverse_num;
634 }
635 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)636 bool CheckerboardConfiguration::verifyImage(BufferWithMemory *resultBuffer, Context &context, TestParams &testParams)
637 {
638     // Checkerboard configuration does not support empty geometry tests.
639     DE_ASSERT(testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY);
640 
641     DE_UNREF(context);
642     const auto *bufferPtr = (int32_t *)resultBuffer->getAllocation().getHostPtr();
643     uint32_t pos          = 0;
644     uint32_t failures     = 0;
645 
646     // verify results - each test case should generate checkerboard pattern
647     for (uint32_t y = 0; y < testParams.height; ++y)
648         for (uint32_t x = 0; x < testParams.width; ++x)
649         {
650             // The hit value should match the shader code.
651             if (testParams.useCullMask)
652             {
653                 const int32_t hitValue = testParams.cullMask & 0x000000FFu; // only 8 last bits are used by the cullMask
654                 const int32_t expectedResult =
655                     ((x + y) % 2) ? hitValue : bitfieldReverse(testParams.cullMask & 0x000000FFu);
656 
657                 if (bufferPtr[pos] != expectedResult)
658                     failures++;
659             }
660             else
661             {
662                 const int32_t hitValue       = ((testParams.instanceCustomIndexCase != InstanceCustomIndexCase::NONE) ?
663                                                     static_cast<int32_t>(INSTANCE_CUSTOM_INDEX_BASE + x + y) :
664                                                     2);
665                 const int32_t expectedResult = ((x + y) % 2) ? hitValue : 1;
666 
667                 if (bufferPtr[pos] != expectedResult)
668                     failures++;
669             }
670 
671             ++pos;
672         }
673     return failures == 0;
674 }
675 
getResultImageFormat()676 VkFormat CheckerboardConfiguration::getResultImageFormat()
677 {
678     return VK_FORMAT_R32_SINT;
679 }
680 
getResultImageFormatSize()681 size_t CheckerboardConfiguration::getResultImageFormatSize()
682 {
683     return sizeof(uint32_t);
684 }
685 
getClearValue()686 VkClearValue CheckerboardConfiguration::getClearValue()
687 {
688     return makeClearValueColorU32(0xFF, 0u, 0u, 0u);
689 }
690 
691 class SingleTriangleConfiguration : public TestConfiguration
692 {
693 public:
694     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> initBottomAccelerationStructures(
695         Context &context, TestParams &testParams) override;
696     de::MovePtr<TopLevelAccelerationStructure> initTopAccelerationStructure(
697         Context &context, TestParams &testParams,
698         std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures) override;
699     void initRayTracingShaders(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
700                                TestParams &testParams) override;
701     void initShaderBindingTables(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
702                                  TestParams &testParams, VkPipeline pipeline, uint32_t shaderGroupHandleSize,
703                                  uint32_t shaderGroupBaseAlignment,
704                                  de::MovePtr<BufferWithMemory> &raygenShaderBindingTable,
705                                  de::MovePtr<BufferWithMemory> &hitShaderBindingTable,
706                                  de::MovePtr<BufferWithMemory> &missShaderBindingTable) override;
707     bool verifyImage(BufferWithMemory *resultBuffer, Context &context, TestParams &testParams) override;
708     VkFormat getResultImageFormat() override;
709     size_t getResultImageFormatSize() override;
710     VkClearValue getClearValue() override;
711 
712     // well, actually we have 2 triangles, but we ignore the first one ( see raygen shader for this configuration )
713     const std::vector<tcu::Vec3> vertices = {
714         tcu::Vec3(0.0f, 0.0f, -0.1f), tcu::Vec3(-0.1f, 0.0f, 0.0f), tcu::Vec3(0.0f, -0.1f, 0.0f),
715         tcu::Vec3(0.0f, 0.0f, 0.0f),  tcu::Vec3(0.5f, 0.0f, -0.5f), tcu::Vec3(0.0f, 0.5f, -0.5f),
716     };
717 
718     const std::vector<uint32_t> indices = {3, 4, 5};
719     // Different vertex configurations of a triangle whose parameter x is set to NaN during inactive_triangles tests
720     const bool nanConfig[7][3] = {
721         {true, true, true},  {true, false, false}, {false, true, false}, {false, false, true},
722         {true, true, false}, {false, true, true},  {true, false, true},
723     };
724 };
725 
726 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> SingleTriangleConfiguration::
initBottomAccelerationStructures(Context & context,TestParams & testParams)727     initBottomAccelerationStructures(Context &context, TestParams &testParams)
728 {
729     DE_UNREF(context);
730 
731     // No other cases supported for the single triangle configuration.
732     DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
733 
734     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> result;
735 
736     de::MovePtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
737         makeBottomLevelAccelerationStructure();
738 
739     unsigned int geometryCount = testParams.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_TRIANGLES ? 4U : 1U;
740 
741     if (testParams.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_TRIANGLES)
742     {
743         bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
744 
745         de::SharedPtr<RaytracedGeometryBase> geometry;
746         geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
747 
748         for (unsigned int i = 0; i < geometryCount; i++)
749         {
750             auto customVertices(vertices);
751 
752             const auto nanValue = tcu::Float32::nan().asFloat();
753 
754             if (nanConfig[i][0])
755                 customVertices[3].x() = nanValue;
756             if (nanConfig[i][1])
757                 customVertices[4].x() = nanValue;
758             if (nanConfig[i][2])
759                 customVertices[5].x() = nanValue;
760 
761             for (auto it = begin(customVertices), eit = end(customVertices); it != eit; ++it)
762                 geometry->addVertex(*it);
763 
764             if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
765             {
766                 for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
767                     geometry->addIndex(*it);
768             }
769             bottomLevelAccelerationStructure->addGeometry(geometry);
770         }
771     }
772     else
773     {
774         bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
775 
776         de::SharedPtr<RaytracedGeometryBase> geometry;
777         geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
778 
779         for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
780             geometry->addVertex(*it);
781 
782         if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
783         {
784             for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
785                 geometry->addIndex(*it);
786         }
787         bottomLevelAccelerationStructure->addGeometry(geometry);
788     }
789 
790     result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
791 
792     return result;
793 }
794 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)795 de::MovePtr<TopLevelAccelerationStructure> SingleTriangleConfiguration::initTopAccelerationStructure(
796     Context &context, TestParams &testParams,
797     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures)
798 {
799     DE_UNREF(context);
800     DE_UNREF(testParams);
801 
802     // Unsupported in this configuration.
803     DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
804 
805     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
806     result->setInstanceCount(1u);
807 
808     result->addInstance(bottomLevelAccelerationStructures[0]);
809 
810     return result;
811 }
812 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)813 void SingleTriangleConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline> &rayTracingPipeline,
814                                                         Context &context, TestParams &testParams)
815 {
816     DE_UNREF(testParams);
817     const DeviceInterface &vkd = context.getDeviceInterface();
818     const VkDevice device      = context.getDevice();
819 
820     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,
821                                   createShaderModule(vkd, device, context.getBinaryCollection().get("rgen_depth"), 0),
822                                   0);
823     rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
824                                   createShaderModule(vkd, device, context.getBinaryCollection().get("chit_depth"), 0),
825                                   1);
826     rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,
827                                   createShaderModule(vkd, device, context.getBinaryCollection().get("miss_depth"), 0),
828                                   2);
829 }
830 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,uint32_t shaderGroupHandleSize,uint32_t shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)831 void SingleTriangleConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline> &rayTracingPipeline,
832                                                           Context &context, TestParams &testParams, VkPipeline pipeline,
833                                                           uint32_t shaderGroupHandleSize,
834                                                           uint32_t shaderGroupBaseAlignment,
835                                                           de::MovePtr<BufferWithMemory> &raygenShaderBindingTable,
836                                                           de::MovePtr<BufferWithMemory> &hitShaderBindingTable,
837                                                           de::MovePtr<BufferWithMemory> &missShaderBindingTable)
838 {
839     DE_UNREF(testParams);
840     const DeviceInterface &vkd = context.getDeviceInterface();
841     const VkDevice device      = context.getDevice();
842     Allocator &allocator       = context.getDefaultAllocator();
843 
844     raygenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
845         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
846     hitShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
847         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
848     missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
849         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
850 }
851 
pointInTriangle2D(const tcu::Vec3 & p,const tcu::Vec3 & p0,const tcu::Vec3 & p1,const tcu::Vec3 & p2)852 bool pointInTriangle2D(const tcu::Vec3 &p, const tcu::Vec3 &p0, const tcu::Vec3 &p1, const tcu::Vec3 &p2)
853 {
854     float s = p0.y() * p2.x() - p0.x() * p2.y() + (p2.y() - p0.y()) * p.x() + (p0.x() - p2.x()) * p.y();
855     float t = p0.x() * p1.y() - p0.y() * p1.x() + (p0.y() - p1.y()) * p.x() + (p1.x() - p0.x()) * p.y();
856 
857     if ((s < 0) != (t < 0))
858         return false;
859 
860     float a = -p1.y() * p2.x() + p0.y() * (p2.x() - p1.x()) + p0.x() * (p1.y() - p2.y()) + p1.x() * p2.y();
861 
862     return a < 0 ? (s <= 0 && s + t >= a) : (s >= 0 && s + t <= a);
863 }
864 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)865 bool SingleTriangleConfiguration::verifyImage(BufferWithMemory *resultBuffer, Context &context, TestParams &testParams)
866 {
867     tcu::TextureFormat imageFormat  = vk::mapVkFormat(getResultImageFormat());
868     tcu::TextureFormat vertexFormat = vk::mapVkFormat(testParams.vertexFormat);
869     tcu::ConstPixelBufferAccess resultAccess(imageFormat, testParams.width, testParams.height, 1,
870                                              resultBuffer->getAllocation().getHostPtr());
871 
872     std::vector<float> reference(testParams.width * testParams.height);
873     tcu::PixelBufferAccess referenceAccess(imageFormat, testParams.width, testParams.height, 1, reference.data());
874 
875     // verify results
876     tcu::Vec3 v0          = vertices[3];
877     tcu::Vec3 v1          = vertices[4];
878     tcu::Vec3 v2          = vertices[5];
879     const int numChannels = tcu::getNumUsedChannels(vertexFormat.order);
880     if (numChannels < 3)
881     {
882         v0.z() = 0.0f;
883         v1.z() = 0.0f;
884         v2.z() = 0.0f;
885     }
886     tcu::Vec3 abc = tcu::cross((v2 - v0), (v1 - v0));
887 
888     for (uint32_t j = 0; j < testParams.height; ++j)
889     {
890         float y = 0.1f + 0.2f * float(j) / float(testParams.height - 1);
891         for (uint32_t i = 0; i < testParams.width; ++i)
892         {
893             float x         = 0.1f + 0.2f * float(i) / float(testParams.width - 1);
894             float z         = (abc.x() * x + abc.y() * y) / abc.z();
895             bool inTriangle = pointInTriangle2D(tcu::Vec3(x, y, z), v0, v1, v2);
896             float refValue =
897                 ((inTriangle && testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY) ? 1.0f + z : 0.0f);
898             referenceAccess.setPixel(tcu::Vec4(refValue, 0.0f, 0.0f, 1.0f), i, j);
899         }
900     }
901     return tcu::floatThresholdCompare(context.getTestContext().getLog(), "Result comparison", "", referenceAccess,
902                                       resultAccess, tcu::Vec4(0.01f), tcu::COMPARE_LOG_EVERYTHING);
903 }
904 
getResultImageFormat()905 VkFormat SingleTriangleConfiguration::getResultImageFormat()
906 {
907     return VK_FORMAT_R32_SFLOAT;
908 }
909 
getResultImageFormatSize()910 size_t SingleTriangleConfiguration::getResultImageFormatSize()
911 {
912     return sizeof(float);
913 }
914 
getClearValue()915 VkClearValue SingleTriangleConfiguration::getClearValue()
916 {
917     return makeClearValueColorF32(32.0f, 0.0f, 0.0f, 0.0f);
918 }
919 
920 class UpdateableASConfiguration : public TestConfiguration
921 {
922 public:
923     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> initBottomAccelerationStructures(
924         Context &context, TestParams &testParams) override;
925     de::MovePtr<TopLevelAccelerationStructure> initTopAccelerationStructure(
926         Context &context, TestParams &testParams,
927         std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures) override;
928     void initRayTracingShaders(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
929                                TestParams &testParams) override;
930     void initShaderBindingTables(de::MovePtr<RayTracingPipeline> &rayTracingPipeline, Context &context,
931                                  TestParams &testParams, VkPipeline pipeline, uint32_t shaderGroupHandleSize,
932                                  uint32_t shaderGroupBaseAlignment,
933                                  de::MovePtr<BufferWithMemory> &raygenShaderBindingTable,
934                                  de::MovePtr<BufferWithMemory> &hitShaderBindingTable,
935                                  de::MovePtr<BufferWithMemory> &missShaderBindingTable) override;
936     bool verifyImage(BufferWithMemory *resultBuffer, Context &context, TestParams &testParams) override;
937     VkFormat getResultImageFormat() override;
938     size_t getResultImageFormatSize() override;
939     VkClearValue getClearValue() override;
940 
941     // two triangles: one in the front we will replace with one in the back after updating
942     // update vertex: build with vertices[0], update vertices with vertices[1]
943     // update index: build with vertices[0], updade indices with indices[1]
944     const std::vector<tcu::Vec3> vertices = {
945         tcu::Vec3(0.0f, 0.0f, 0.0f),  tcu::Vec3(0.5f, 0.0f, 0.0f),  tcu::Vec3(0.0f, 0.5f, 0.0f),
946         tcu::Vec3(0.0f, 0.0f, -0.5f), tcu::Vec3(0.5f, 0.0f, -0.5f), tcu::Vec3(0.0f, 0.5f, -0.5f),
947     };
948 
949     const std::vector<uint32_t> indices = {0, 1, 2};
950 };
951 
952 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> UpdateableASConfiguration::
initBottomAccelerationStructures(Context & context,TestParams & testParams)953     initBottomAccelerationStructures(Context &context, TestParams &testParams)
954 {
955     DE_UNREF(context);
956 
957     // No other cases supported for the single triangle configuration.
958     DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
959 
960     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> result;
961 
962     {
963         de::MovePtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
964             makeBottomLevelAccelerationStructure();
965 
966         unsigned int geometryCount = 1U;
967 
968         bottomLevelAccelerationStructure->setGeometryCount(geometryCount);
969 
970         de::SharedPtr<RaytracedGeometryBase> geometry;
971         geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, testParams.vertexFormat, testParams.indexType);
972 
973         for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
974             geometry->addVertex(*it);
975 
976         if (testParams.indexType != VK_INDEX_TYPE_NONE_KHR)
977         {
978             for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
979                 geometry->addIndex(*it);
980         }
981         bottomLevelAccelerationStructure->addGeometry(geometry);
982 
983         result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
984     }
985     return result;
986 }
987 
initTopAccelerationStructure(Context & context,TestParams & testParams,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)988 de::MovePtr<TopLevelAccelerationStructure> UpdateableASConfiguration::initTopAccelerationStructure(
989     Context &context, TestParams &testParams,
990     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures)
991 {
992     DE_UNREF(context);
993     DE_UNREF(testParams);
994 
995     // Unsupported in this configuration.
996     DE_ASSERT(testParams.instanceCustomIndexCase == InstanceCustomIndexCase::NONE);
997 
998     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
999     result->setInstanceCount(1u);
1000 
1001     result->addInstance(bottomLevelAccelerationStructures[0]);
1002 
1003     return result;
1004 }
1005 
initRayTracingShaders(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams)1006 void UpdateableASConfiguration::initRayTracingShaders(de::MovePtr<RayTracingPipeline> &rayTracingPipeline,
1007                                                       Context &context, TestParams &testParams)
1008 {
1009     DE_UNREF(testParams);
1010     const DeviceInterface &vkd = context.getDeviceInterface();
1011     const VkDevice device      = context.getDevice();
1012 
1013     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,
1014                                   createShaderModule(vkd, device, context.getBinaryCollection().get("rgen_depth"), 0),
1015                                   0);
1016     rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
1017                                   createShaderModule(vkd, device, context.getBinaryCollection().get("chit_depth"), 0),
1018                                   1);
1019     rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,
1020                                   createShaderModule(vkd, device, context.getBinaryCollection().get("miss_depth"), 0),
1021                                   2);
1022 }
1023 
initShaderBindingTables(de::MovePtr<RayTracingPipeline> & rayTracingPipeline,Context & context,TestParams & testParams,VkPipeline pipeline,uint32_t shaderGroupHandleSize,uint32_t shaderGroupBaseAlignment,de::MovePtr<BufferWithMemory> & raygenShaderBindingTable,de::MovePtr<BufferWithMemory> & hitShaderBindingTable,de::MovePtr<BufferWithMemory> & missShaderBindingTable)1024 void UpdateableASConfiguration::initShaderBindingTables(de::MovePtr<RayTracingPipeline> &rayTracingPipeline,
1025                                                         Context &context, TestParams &testParams, VkPipeline pipeline,
1026                                                         uint32_t shaderGroupHandleSize,
1027                                                         uint32_t shaderGroupBaseAlignment,
1028                                                         de::MovePtr<BufferWithMemory> &raygenShaderBindingTable,
1029                                                         de::MovePtr<BufferWithMemory> &hitShaderBindingTable,
1030                                                         de::MovePtr<BufferWithMemory> &missShaderBindingTable)
1031 {
1032     DE_UNREF(testParams);
1033     const DeviceInterface &vkd = context.getDeviceInterface();
1034     const VkDevice device      = context.getDevice();
1035     Allocator &allocator       = context.getDefaultAllocator();
1036 
1037     raygenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
1038         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
1039     hitShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
1040         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
1041     missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
1042         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
1043 }
1044 
verifyImage(BufferWithMemory * resultBuffer,Context & context,TestParams & testParams)1045 bool UpdateableASConfiguration::verifyImage(BufferWithMemory *resultBuffer, Context &context, TestParams &testParams)
1046 {
1047     tcu::TextureFormat imageFormat  = vk::mapVkFormat(getResultImageFormat());
1048     tcu::TextureFormat vertexFormat = vk::mapVkFormat(testParams.vertexFormat);
1049     tcu::ConstPixelBufferAccess resultAccess(imageFormat, testParams.width, testParams.height, 1,
1050                                              resultBuffer->getAllocation().getHostPtr());
1051 
1052     std::vector<float> reference(testParams.width * testParams.height);
1053     tcu::PixelBufferAccess referenceAccess(imageFormat, testParams.width, testParams.height, 1, reference.data());
1054 
1055     // verify results
1056     tcu::Vec3 v0          = vertices[3];
1057     tcu::Vec3 v1          = vertices[4];
1058     tcu::Vec3 v2          = vertices[5];
1059     const int numChannels = tcu::getNumUsedChannels(vertexFormat.order);
1060     if (numChannels < 3)
1061     {
1062         v0.z() = 0.0f;
1063         v1.z() = 0.0f;
1064         v2.z() = 0.0f;
1065     }
1066 
1067     for (uint32_t j = 0; j < testParams.height; ++j)
1068     {
1069         float y = 0.1f + 0.2f * float(j) / float(testParams.height - 1);
1070         for (uint32_t i = 0; i < testParams.width; ++i)
1071         {
1072             float x         = 0.1f + 0.2f * float(i) / float(testParams.width - 1);
1073             float z         = v0.z();
1074             bool inTriangle = pointInTriangle2D(tcu::Vec3(x, y, z), v0, v1, v2);
1075             float refValue =
1076                 ((inTriangle && testParams.emptyASCase == EmptyAccelerationStructureCase::NOT_EMPTY) ? 1.0f - z : 0.0f);
1077             referenceAccess.setPixel(tcu::Vec4(refValue, 0.0f, 0.0f, 1.0f), i, j);
1078         }
1079     }
1080     return tcu::floatThresholdCompare(context.getTestContext().getLog(), "Result comparison", "", referenceAccess,
1081                                       resultAccess, tcu::Vec4(0.01f), tcu::COMPARE_LOG_EVERYTHING);
1082 }
1083 
getResultImageFormat()1084 VkFormat UpdateableASConfiguration::getResultImageFormat()
1085 {
1086     return VK_FORMAT_R32_SFLOAT;
1087 }
1088 
getResultImageFormatSize()1089 size_t UpdateableASConfiguration::getResultImageFormatSize()
1090 {
1091     return sizeof(float);
1092 }
1093 
getClearValue()1094 VkClearValue UpdateableASConfiguration::getClearValue()
1095 {
1096     return makeClearValueColorF32(32.0f, 0.0f, 0.0f, 0.0f);
1097 }
1098 
commonASTestsCheckSupport(Context & context)1099 void commonASTestsCheckSupport(Context &context)
1100 {
1101     context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
1102     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
1103     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
1104 
1105     const VkPhysicalDeviceRayTracingPipelineFeaturesKHR &rayTracingPipelineFeaturesKHR =
1106         context.getRayTracingPipelineFeatures();
1107     if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == false)
1108         TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
1109 
1110     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
1111         context.getAccelerationStructureFeatures();
1112     if (accelerationStructureFeaturesKHR.accelerationStructure == false)
1113         TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires "
1114                              "VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
1115 }
1116 
1117 class RayTracingASBasicTestCase : public TestCase
1118 {
1119 public:
1120     RayTracingASBasicTestCase(tcu::TestContext &context, const char *name, const TestParams &data);
1121     ~RayTracingASBasicTestCase(void);
1122 
1123     void checkSupport(Context &context) const override;
1124     void initPrograms(SourceCollections &programCollection) const override;
1125     TestInstance *createInstance(Context &context) const override;
1126 
1127 protected:
1128     TestParams m_data;
1129 };
1130 
1131 // Same as RayTracingASBasicTestCase but it will only initialize programs for SingleTriangleConfiguration and use hand-tuned SPIR-V
1132 // assembly.
1133 class RayTracingASFuncArgTestCase : public RayTracingASBasicTestCase
1134 {
1135 public:
1136     RayTracingASFuncArgTestCase(tcu::TestContext &context, const char *name, const TestParams &data);
~RayTracingASFuncArgTestCase(void)1137     ~RayTracingASFuncArgTestCase(void)
1138     {
1139     }
1140 
1141     void initPrograms(SourceCollections &programCollection) const override;
1142 };
1143 
1144 class RayTracingASBasicTestInstance : public TestInstance
1145 {
1146 public:
1147     RayTracingASBasicTestInstance(Context &context, const TestParams &data);
1148     ~RayTracingASBasicTestInstance(void) = default;
1149     tcu::TestStatus iterate(void) override;
1150 
1151 protected:
1152     bool iterateNoWorkers(void);
1153     bool iterateWithWorkers(void);
1154     de::MovePtr<BufferWithMemory> runTest(const uint32_t workerThreadsCount);
1155 
1156 private:
1157     TestParams m_data;
1158 };
1159 
RayTracingASBasicTestCase(tcu::TestContext & context,const char * name,const TestParams & data)1160 RayTracingASBasicTestCase::RayTracingASBasicTestCase(tcu::TestContext &context, const char *name,
1161                                                      const TestParams &data)
1162     : vkt::TestCase(context, name)
1163     , m_data(data)
1164 {
1165 }
1166 
~RayTracingASBasicTestCase(void)1167 RayTracingASBasicTestCase::~RayTracingASBasicTestCase(void)
1168 {
1169 }
1170 
checkSupport(Context & context) const1171 void RayTracingASBasicTestCase::checkSupport(Context &context) const
1172 {
1173     commonASTestsCheckSupport(context);
1174 
1175     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
1176         context.getAccelerationStructureFeatures();
1177     if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR &&
1178         accelerationStructureFeaturesKHR.accelerationStructureHostCommands == false)
1179         TCU_THROW(NotSupportedError,
1180                   "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
1181 
1182     if (m_data.useCullMask)
1183         context.requireDeviceFunctionality("VK_KHR_ray_tracing_maintenance1");
1184 
1185     // Check supported vertex format.
1186     checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(),
1187                                                  m_data.vertexFormat);
1188 }
1189 
initPrograms(SourceCollections & programCollection) const1190 void RayTracingASBasicTestCase::initPrograms(SourceCollections &programCollection) const
1191 {
1192     bool storeInRGen = false;
1193     bool storeInAHit = false;
1194     bool storeInCHit = false;
1195     bool storeInISec = false;
1196 
1197     switch (m_data.instanceCustomIndexCase)
1198     {
1199     case InstanceCustomIndexCase::NONE:
1200         storeInRGen = true;
1201         break;
1202     case InstanceCustomIndexCase::CLOSEST_HIT:
1203         storeInCHit = true;
1204         break;
1205     case InstanceCustomIndexCase::ANY_HIT:
1206         storeInAHit = true;
1207         break;
1208     case InstanceCustomIndexCase::INTERSECTION:
1209         storeInISec = true;
1210         break;
1211     default:
1212         DE_ASSERT(false);
1213         break;
1214     }
1215 
1216     const std::string imageDeclaration = "layout(r32i, set = 0, binding = 0) uniform iimage2D result;\n";
1217     const std::string storeCustomIndex =
1218         "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(gl_InstanceCustomIndexEXT, 0, 0, 1));\n";
1219     const std::string storeCullMask =
1220         "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(gl_CullMaskEXT, 0, 0, 1));\n";
1221     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1222 
1223     {
1224         std::stringstream css;
1225         css << "#version 460 core\n"
1226             << "#extension GL_EXT_ray_tracing : require\n"
1227             << "layout(location = 0) rayPayloadEXT ivec4 hitValue;\n";
1228 
1229         if (storeInRGen)
1230             css << imageDeclaration;
1231 
1232         css << "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
1233             << "\n"
1234             << "void main()\n"
1235             << "{\n"
1236             << "  float tmin      = 0.0;\n"
1237             << "  float tmax      = 1.0;\n"
1238             << "  vec3  origin    = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, 0.5);\n"
1239             << "  vec3  direction = vec3(0.0,0.0,-1.0);\n"
1240             << "  hitValue        = ivec4(0,0,0,0);\n"
1241             << "  traceRayEXT(topLevelAS, "
1242             << ((m_data.cullFlags == InstanceCullFlags::NONE) ? "0, " : "gl_RayFlagsCullBackFacingTrianglesEXT, ")
1243             << m_data.cullMask << ", 0, 0, 0, origin, tmin, direction, tmax, 0);\n";
1244 
1245         if (storeInRGen)
1246             css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
1247 
1248         css << "}\n";
1249 
1250         programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
1251     }
1252 
1253     {
1254         std::stringstream css;
1255         css << "#version 460 core\n"
1256             << "#extension GL_EXT_ray_tracing : require\n"
1257             << ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1258             << "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
1259 
1260         if (storeInCHit)
1261             css << imageDeclaration;
1262 
1263         css << "void main()\n"
1264             << "{\n"
1265             << "  hitValue = ivec4(2,0,0,1);\n";
1266 
1267         if (storeInCHit)
1268         {
1269             if (m_data.useCullMask)
1270             {
1271                 css << storeCullMask;
1272             }
1273             else
1274             {
1275                 css << storeCustomIndex;
1276             }
1277         }
1278 
1279         css << "}\n";
1280 
1281         programCollection.glslSources.add("chit")
1282             << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1283     }
1284 
1285     if (storeInAHit)
1286     {
1287         std::stringstream css;
1288         css << "#version 460 core\n"
1289             << "#extension GL_EXT_ray_tracing : require\n"
1290             << ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n") << imageDeclaration
1291             << "void main()\n"
1292             << "{\n"
1293             << ((m_data.useCullMask) ? storeCullMask : storeCustomIndex) << "}\n";
1294 
1295         programCollection.glslSources.add("ahit") << glu::AnyHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1296     }
1297 
1298     {
1299         std::stringstream css;
1300         css << "#version 460 core\n"
1301             << "#extension GL_EXT_ray_tracing : require\n"
1302             << ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1303             << "hitAttributeEXT ivec4 hitAttribute;\n";
1304 
1305         if (storeInISec)
1306             css << imageDeclaration;
1307 
1308         css << "void main()\n"
1309             << "{\n"
1310             << "  hitAttribute = ivec4(0,0,0,0);\n"
1311             << "  reportIntersectionEXT(0.5f, 0);\n";
1312         if (storeInISec)
1313         {
1314             if (m_data.useCullMask)
1315             {
1316                 css << storeCullMask;
1317             }
1318             else
1319             {
1320                 css << storeCustomIndex;
1321             }
1322         }
1323 
1324         css << "}\n";
1325 
1326         programCollection.glslSources.add("isect")
1327             << glu::IntersectionSource(updateRayTracingGLSL(css.str())) << buildOptions;
1328     }
1329 
1330     {
1331         std::stringstream css;
1332         css << "#version 460 core\n"
1333             << "#extension GL_EXT_ray_tracing : require\n"
1334             << ((m_data.useCullMask) ? "#extension GL_EXT_ray_cull_mask : require\n" : "\n")
1335             << "layout(location = 0) rayPayloadInEXT ivec4 hitValue;\n";
1336 
1337         if (!storeInRGen)
1338             css << imageDeclaration;
1339 
1340         css << "void main()\n"
1341             << "{\n"
1342             << "  hitValue = ivec4(1,0,0,1);\n";
1343         if (!storeInRGen)
1344         {
1345             if (m_data.useCullMask)
1346             {
1347                 css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), ivec4(bitfieldReverse(uint(gl_CullMaskEXT)), 0, "
1348                        "0, 1)); \n";
1349             }
1350             else
1351             {
1352                 css << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n";
1353             }
1354         }
1355 
1356         css << "}\n";
1357 
1358         programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1359     }
1360 
1361     {
1362         std::stringstream css;
1363         css << "#version 460 core\n"
1364                "#extension GL_EXT_ray_tracing : require\n"
1365                "layout(location = 0) rayPayloadEXT vec4 hitValue;\n"
1366                "layout(r32f, set = 0, binding = 0) uniform image2D result;\n"
1367                "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
1368                "\n"
1369                "vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)\n"
1370                "{\n"
1371                "  return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + "
1372                "(float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;\n"
1373                "}\n"
1374                "\n"
1375                "void main()\n"
1376                "{\n"
1377                "  float tmin      = 0.0;\n"
1378                "  float tmax      = 2.0;\n"
1379                "  vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );\n"
1380                "  vec3  direction = vec3(0.0,0.0,-1.0);\n"
1381                "  hitValue        = vec4(0.0,0.0,0.0,0.0);\n"
1382                "  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
1383                "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n"
1384                "}\n";
1385         programCollection.glslSources.add("rgen_depth")
1386             << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
1387     }
1388 
1389     {
1390         std::stringstream css;
1391         css << "#version 460 core\n"
1392                "#extension GL_EXT_ray_tracing : require\n"
1393                "layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1394                "void main()\n"
1395                "{\n"
1396                "  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1397                "}\n";
1398 
1399         programCollection.glslSources.add("chit_depth")
1400             << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1401     }
1402 
1403     {
1404         std::stringstream css;
1405         css << "#version 460 core\n"
1406                "#extension GL_EXT_ray_tracing : require\n"
1407                "layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1408                "void main()\n"
1409                "{\n"
1410                "  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1411                "}\n";
1412 
1413         programCollection.glslSources.add("miss_depth")
1414             << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1415     }
1416 }
1417 
createInstance(Context & context) const1418 TestInstance *RayTracingASBasicTestCase::createInstance(Context &context) const
1419 {
1420     return new RayTracingASBasicTestInstance(context, m_data);
1421 }
1422 
RayTracingASFuncArgTestCase(tcu::TestContext & context,const char * name,const TestParams & data)1423 RayTracingASFuncArgTestCase::RayTracingASFuncArgTestCase(tcu::TestContext &context, const char *name,
1424                                                          const TestParams &data)
1425     : RayTracingASBasicTestCase(context, name, data)
1426 {
1427 }
1428 
initPrograms(SourceCollections & programCollection) const1429 void RayTracingASFuncArgTestCase::initPrograms(SourceCollections &programCollection) const
1430 {
1431     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
1432     const vk::SpirVAsmBuildOptions spvBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
1433 
1434     {
1435         // The SPIR-V assembly below is based on the following GLSL code. Some
1436         // modifications have been made to make traceRaysBottomWrapper take a bare
1437         // acceleration structure as its argument instead of a pointer to it, so we can
1438         // test passing a pointer and a bare value in the same test.
1439         //
1440         //    #version 460 core
1441         //    #extension GL_EXT_ray_tracing : require
1442         // layout(location = 0) rayPayloadEXT vec4 hitValue;
1443         // layout(r32f, set = 0, binding = 0) uniform image2D result;
1444         // layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;
1445         //
1446         //    void traceRaysBottomWrapper(
1447         //      accelerationStructureEXT topLevel,
1448         //      uint rayFlags,
1449         //      uint cullMask,
1450         //      uint sbtRecordOffset,
1451         //      uint sbtRecordStride,
1452         //      uint missIndex,
1453         //      vec3 origin,
1454         //      float Tmin,
1455         //      vec3 direction,
1456         //      float Tmax)
1457         //    {
1458         //   traceRayEXT(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax, 0);
1459         //    }
1460         //
1461         //    void traceRaysTopWrapper(
1462         //      accelerationStructureEXT topLevel,
1463         //      uint rayFlags,
1464         //      uint cullMask,
1465         //      uint sbtRecordOffset,
1466         //      uint sbtRecordStride,
1467         //      uint missIndex,
1468         //      vec3 origin,
1469         //      float Tmin,
1470         //      vec3 direction,
1471         //      float Tmax)
1472         //    {
1473         //   traceRaysBottomWrapper(topLevel, rayFlags, cullMask, sbtRecordOffset, sbtRecordStride, missIndex, origin, Tmin, direction, Tmax);
1474         //    }
1475         //
1476         //    vec3 calculateOrigin(vec3 zeroOrigin, vec3 xAxis, vec3 yAxis)
1477         //    {
1478         //   return zeroOrigin + (float(gl_LaunchIDEXT.x)/float(gl_LaunchSizeEXT.x-1)) * xAxis + (float(gl_LaunchIDEXT.y)/float(gl_LaunchSizeEXT.y-1)) * yAxis;
1479         //    }
1480         //
1481         //    void main()
1482         //    {
1483         //   float tmin      = 0.0;
1484         //   float tmax      = 2.0;
1485         //   vec3  origin    = calculateOrigin( vec3(0.1,0.1,1.0), vec3(0.2,0.0,0.0), vec3(0.0,0.2,0.0) );
1486         //   vec3  direction = vec3(0.0,0.0,-1.0);
1487         //   hitValue        = vec4(0.0,0.0,0.0,0.0);
1488         //   traceRaysTopWrapper(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direction, tmax);
1489         //   imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);
1490         //    }
1491 
1492         std::ostringstream rgen;
1493         rgen
1494             << "; SPIR-V\n"
1495             << "; Version: 1.4\n"
1496             << "; Generator: Khronos Glslang Reference Front End; 10\n"
1497             << "; Bound: 156\n"
1498             << "; Schema: 0\n"
1499             << "OpCapability RayTracingKHR\n"
1500             << "OpExtension \"SPV_KHR_ray_tracing\"\n"
1501             << "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1502             << "OpMemoryModel Logical GLSL450\n"
1503             << "OpEntryPoint RayGenerationKHR %4 \"main\" %59 %82 %88 %130 %148\n"
1504             << "OpDecorate %59 Location 0\n"
1505             << "OpDecorate %82 BuiltIn LaunchIdKHR\n"
1506             << "OpDecorate %88 BuiltIn LaunchSizeKHR\n"
1507             << "OpDecorate %130 DescriptorSet 0\n"
1508             << "OpDecorate %130 Binding 1\n"
1509             << "OpDecorate %148 DescriptorSet 0\n"
1510             << "OpDecorate %148 Binding 0\n"
1511             << "%2 = OpTypeVoid\n"
1512             << "%3 = OpTypeFunction %2\n"
1513 
1514             // This is the bare type.
1515             << "%6 = OpTypeAccelerationStructureKHR\n"
1516 
1517             // This is the pointer type.
1518             << "%7 = OpTypePointer UniformConstant %6\n"
1519 
1520             << "%8 = OpTypeInt 32 0\n"
1521             << "%9 = OpTypePointer Function %8\n"
1522             << "%10 = OpTypeFloat 32\n"
1523             << "%11 = OpTypeVector %10 3\n"
1524             << "%12 = OpTypePointer Function %11\n"
1525             << "%13 = OpTypePointer Function %10\n"
1526 
1527             // This is the type for traceRaysTopWrapper and also the original traceRaysBottomWrapper.
1528             << "%14 = OpTypeFunction %2 %7 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1529 
1530             // This is the modified type to take a bare AS as the first argument, for the modified version of traceRaysBottomWrapper.
1531             << "%14b = OpTypeFunction %2 %6 %9 %9 %9 %9 %9 %12 %13 %12 %13\n"
1532 
1533             << "%39 = OpTypeFunction %11 %12 %12 %12\n"
1534             << "%55 = OpTypeInt 32 1\n"
1535             << "%56 = OpConstant %55 0\n"
1536             << "%57 = OpTypeVector %10 4\n"
1537             << "%58 = OpTypePointer RayPayloadKHR %57\n"
1538             << "%59 = OpVariable %58 RayPayloadKHR\n"
1539             << "%80 = OpTypeVector %8 3\n"
1540             << "%81 = OpTypePointer Input %80\n"
1541             << "%82 = OpVariable %81 Input\n"
1542             << "%83 = OpConstant %8 0\n"
1543             << "%84 = OpTypePointer Input %8\n"
1544             << "%88 = OpVariable %81 Input\n"
1545             << "%91 = OpConstant %8 1\n"
1546             << "%112 = OpConstant %10 0\n"
1547             << "%114 = OpConstant %10 2\n"
1548             << "%116 = OpConstant %10 0.100000001\n"
1549             << "%117 = OpConstant %10 1\n"
1550             << "%118 = OpConstantComposite %11 %116 %116 %117\n"
1551             << "%119 = OpConstant %10 0.200000003\n"
1552             << "%120 = OpConstantComposite %11 %119 %112 %112\n"
1553             << "%121 = OpConstantComposite %11 %112 %119 %112\n"
1554             << "%127 = OpConstant %10 -1\n"
1555             << "%128 = OpConstantComposite %11 %112 %112 %127\n"
1556             << "%129 = OpConstantComposite %57 %112 %112 %112 %112\n"
1557             << "%130 = OpVariable %7 UniformConstant\n"
1558             << "%131 = OpConstant %8 255\n"
1559             << "%146 = OpTypeImage %10 2D 0 0 0 2 R32f\n"
1560             << "%147 = OpTypePointer UniformConstant %146\n"
1561             << "%148 = OpVariable %147 UniformConstant\n"
1562             << "%150 = OpTypeVector %8 2\n"
1563             << "%153 = OpTypeVector %55 2\n"
1564 
1565             // This is main().
1566             << "%4 = OpFunction %2 None %3\n"
1567             << "%5 = OpLabel\n"
1568             << "%111 = OpVariable %13 Function\n"
1569             << "%113 = OpVariable %13 Function\n"
1570             << "%115 = OpVariable %12 Function\n"
1571             << "%122 = OpVariable %12 Function\n"
1572             << "%123 = OpVariable %12 Function\n"
1573             << "%124 = OpVariable %12 Function\n"
1574             << "%126 = OpVariable %12 Function\n"
1575             << "%132 = OpVariable %9 Function\n"
1576             << "%133 = OpVariable %9 Function\n"
1577             << "%134 = OpVariable %9 Function\n"
1578             << "%135 = OpVariable %9 Function\n"
1579             << "%136 = OpVariable %9 Function\n"
1580             << "%137 = OpVariable %12 Function\n"
1581             << "%139 = OpVariable %13 Function\n"
1582             << "%141 = OpVariable %12 Function\n"
1583             << "%143 = OpVariable %13 Function\n"
1584             << "OpStore %111 %112\n"
1585             << "OpStore %113 %114\n"
1586             << "OpStore %122 %118\n"
1587             << "OpStore %123 %120\n"
1588             << "OpStore %124 %121\n"
1589             << "%125 = OpFunctionCall %11 %43 %122 %123 %124\n"
1590             << "OpStore %115 %125\n"
1591             << "OpStore %126 %128\n"
1592             << "OpStore %59 %129\n"
1593             << "OpStore %132 %83\n"
1594             << "OpStore %133 %131\n"
1595             << "OpStore %134 %83\n"
1596             << "OpStore %135 %83\n"
1597             << "OpStore %136 %83\n"
1598             << "%138 = OpLoad %11 %115\n"
1599             << "OpStore %137 %138\n"
1600             << "%140 = OpLoad %10 %111\n"
1601             << "OpStore %139 %140\n"
1602             << "%142 = OpLoad %11 %126\n"
1603             << "OpStore %141 %142\n"
1604             << "%144 = OpLoad %10 %113\n"
1605             << "OpStore %143 %144\n"
1606             << "%145 = OpFunctionCall %2 %37 %130 %132 %133 %134 %135 %136 %137 %139 %141 %143\n"
1607             << "%149 = OpLoad %146 %148\n"
1608             << "%151 = OpLoad %80 %82\n"
1609             << "%152 = OpVectorShuffle %150 %151 %151 0 1\n"
1610             << "%154 = OpBitcast %153 %152\n"
1611             << "%155 = OpLoad %57 %59\n"
1612             << "OpImageWrite %149 %154 %155\n"
1613             << "OpReturn\n"
1614             << "OpFunctionEnd\n"
1615 
1616             // This is traceRaysBottomWrapper, doing the OpTraceRayKHR call.
1617             // We have modified the type so it takes a bare AS as the first argument.
1618             // %25 = OpFunction %2 None %14
1619             << "%25 = OpFunction %2 None %14b\n"
1620 
1621             // Also the type of the first argument here.
1622             // %15 = OpFunctionParameter %7
1623             << "%15 = OpFunctionParameter %6\n"
1624 
1625             << "%16 = OpFunctionParameter %9\n"
1626             << "%17 = OpFunctionParameter %9\n"
1627             << "%18 = OpFunctionParameter %9\n"
1628             << "%19 = OpFunctionParameter %9\n"
1629             << "%20 = OpFunctionParameter %9\n"
1630             << "%21 = OpFunctionParameter %12\n"
1631             << "%22 = OpFunctionParameter %13\n"
1632             << "%23 = OpFunctionParameter %12\n"
1633             << "%24 = OpFunctionParameter %13\n"
1634             << "%26 = OpLabel\n"
1635 
1636             // We no longer need to dereference the pointer here.
1637             // %45 = OpLoad %6 %15
1638 
1639             << "%46 = OpLoad %8 %16\n"
1640             << "%47 = OpLoad %8 %17\n"
1641             << "%48 = OpLoad %8 %18\n"
1642             << "%49 = OpLoad %8 %19\n"
1643             << "%50 = OpLoad %8 %20\n"
1644             << "%51 = OpLoad %11 %21\n"
1645             << "%52 = OpLoad %10 %22\n"
1646             << "%53 = OpLoad %11 %23\n"
1647             << "%54 = OpLoad %10 %24\n"
1648 
1649             // And we can use the first argument here directly.
1650             // OpTraceRayKHR %45 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59
1651             << "OpTraceRayKHR %15 %46 %47 %48 %49 %50 %51 %52 %53 %54 %59\n"
1652 
1653             << "OpReturn\n"
1654             << "OpFunctionEnd\n"
1655 
1656             // This is traceRaysTopWrapper, which calls traceRaysBottomWrapper.
1657             << "%37 = OpFunction %2 None %14\n"
1658 
1659             // First argument, pointer to AS.
1660             << "%27 = OpFunctionParameter %7\n"
1661 
1662             << "%28 = OpFunctionParameter %9\n"
1663             << "%29 = OpFunctionParameter %9\n"
1664             << "%30 = OpFunctionParameter %9\n"
1665             << "%31 = OpFunctionParameter %9\n"
1666             << "%32 = OpFunctionParameter %9\n"
1667             << "%33 = OpFunctionParameter %12\n"
1668             << "%34 = OpFunctionParameter %13\n"
1669             << "%35 = OpFunctionParameter %12\n"
1670             << "%36 = OpFunctionParameter %13\n"
1671             << "%38 = OpLabel\n"
1672             << "%60 = OpVariable %9 Function\n"
1673             << "%62 = OpVariable %9 Function\n"
1674             << "%64 = OpVariable %9 Function\n"
1675             << "%66 = OpVariable %9 Function\n"
1676             << "%68 = OpVariable %9 Function\n"
1677             << "%70 = OpVariable %12 Function\n"
1678             << "%72 = OpVariable %13 Function\n"
1679             << "%74 = OpVariable %12 Function\n"
1680             << "%76 = OpVariable %13 Function\n"
1681 
1682             // Dereference the pointer to pass the AS as the first argument.
1683             << "%27b = OpLoad %6 %27\n"
1684 
1685             << "%61 = OpLoad %8 %28\n"
1686             << "OpStore %60 %61\n"
1687             << "%63 = OpLoad %8 %29\n"
1688             << "OpStore %62 %63\n"
1689             << "%65 = OpLoad %8 %30\n"
1690             << "OpStore %64 %65\n"
1691             << "%67 = OpLoad %8 %31\n"
1692             << "OpStore %66 %67\n"
1693             << "%69 = OpLoad %8 %32\n"
1694             << "OpStore %68 %69\n"
1695             << "%71 = OpLoad %11 %33\n"
1696             << "OpStore %70 %71\n"
1697             << "%73 = OpLoad %10 %34\n"
1698             << "OpStore %72 %73\n"
1699             << "%75 = OpLoad %11 %35\n"
1700             << "OpStore %74 %75\n"
1701             << "%77 = OpLoad %10 %36\n"
1702             << "OpStore %76 %77\n"
1703 
1704             // %2 is void, %25 is traceRaysBottomWrapper and %27 was the first argument.
1705             // We need to pass the loaded AS instead.
1706             // %78 = OpFunctionCall %2 %25 %27 %60 %62 %64 %66 %68 %70 %72 %74 %76
1707             << "%78 = OpFunctionCall %2 %25 %27b %60 %62 %64 %66 %68 %70 %72 %74 %76\n"
1708 
1709             << "OpReturn\n"
1710             << "OpFunctionEnd\n"
1711 
1712             // This is calculateOrigin().
1713             << "%43 = OpFunction %11 None %39\n"
1714             << "%40 = OpFunctionParameter %12\n"
1715             << "%41 = OpFunctionParameter %12\n"
1716             << "%42 = OpFunctionParameter %12\n"
1717             << "%44 = OpLabel\n"
1718             << "%79 = OpLoad %11 %40\n"
1719             << "%85 = OpAccessChain %84 %82 %83\n"
1720             << "%86 = OpLoad %8 %85\n"
1721             << "%87 = OpConvertUToF %10 %86\n"
1722             << "%89 = OpAccessChain %84 %88 %83\n"
1723             << "%90 = OpLoad %8 %89\n"
1724             << "%92 = OpISub %8 %90 %91\n"
1725             << "%93 = OpConvertUToF %10 %92\n"
1726             << "%94 = OpFDiv %10 %87 %93\n"
1727             << "%95 = OpLoad %11 %41\n"
1728             << "%96 = OpVectorTimesScalar %11 %95 %94\n"
1729             << "%97 = OpFAdd %11 %79 %96\n"
1730             << "%98 = OpAccessChain %84 %82 %91\n"
1731             << "%99 = OpLoad %8 %98\n"
1732             << "%100 = OpConvertUToF %10 %99\n"
1733             << "%101 = OpAccessChain %84 %88 %91\n"
1734             << "%102 = OpLoad %8 %101\n"
1735             << "%103 = OpISub %8 %102 %91\n"
1736             << "%104 = OpConvertUToF %10 %103\n"
1737             << "%105 = OpFDiv %10 %100 %104\n"
1738             << "%106 = OpLoad %11 %42\n"
1739             << "%107 = OpVectorTimesScalar %11 %106 %105\n"
1740             << "%108 = OpFAdd %11 %97 %107\n"
1741             << "OpReturnValue %108\n"
1742             << "OpFunctionEnd\n";
1743 
1744         programCollection.spirvAsmSources.add("rgen_depth") << spvBuildOptions << rgen.str();
1745     }
1746 
1747     // chit_depth and miss_depth below have been left untouched.
1748 
1749     {
1750         std::stringstream css;
1751         css << "#version 460 core\n"
1752                "#extension GL_EXT_ray_tracing : require\n"
1753                "layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1754                "void main()\n"
1755                "{\n"
1756                "  hitValue = vec4(gl_RayTmaxEXT,0.0,0.0,1.0);\n"
1757                "}\n";
1758 
1759         programCollection.glslSources.add("chit_depth")
1760             << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
1761     }
1762 
1763     {
1764         std::stringstream css;
1765         css << "#version 460 core\n"
1766                "#extension GL_EXT_ray_tracing : require\n"
1767                "layout(location = 0) rayPayloadInEXT vec4 hitValue;\n"
1768                "void main()\n"
1769                "{\n"
1770                "  hitValue = vec4(0.0,0.0,0.0,1.0);\n"
1771                "}\n";
1772 
1773         programCollection.glslSources.add("miss_depth")
1774             << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
1775     }
1776 }
1777 
RayTracingASBasicTestInstance(Context & context,const TestParams & data)1778 RayTracingASBasicTestInstance::RayTracingASBasicTestInstance(Context &context, const TestParams &data)
1779     : vkt::TestInstance(context)
1780     , m_data(data)
1781 {
1782 }
1783 
runTest(const uint32_t workerThreadsCount)1784 de::MovePtr<BufferWithMemory> RayTracingASBasicTestInstance::runTest(const uint32_t workerThreadsCount)
1785 {
1786     const InstanceInterface &vki            = m_context.getInstanceInterface();
1787     const DeviceInterface &vkd              = m_context.getDeviceInterface();
1788     const VkDevice device                   = m_context.getDevice();
1789     const VkPhysicalDevice physicalDevice   = m_context.getPhysicalDevice();
1790     const uint32_t queueFamilyIndex         = m_context.getUniversalQueueFamilyIndex();
1791     const VkQueue queue                     = m_context.getUniversalQueue();
1792     Allocator &allocator                    = m_context.getDefaultAllocator();
1793     const uint32_t pixelCount               = m_data.width * m_data.height;
1794     const uint32_t shaderGroupHandleSize    = getShaderGroupSize(vki, physicalDevice);
1795     const uint32_t shaderGroupBaseAlignment = getShaderGroupBaseAlignment(vki, physicalDevice);
1796     const bool htCopy                       = (workerThreadsCount != 0) && (m_data.operationType == OP_COPY);
1797     const bool htSerialize                  = (workerThreadsCount != 0) && (m_data.operationType == OP_SERIALIZE);
1798 
1799     const Move<VkDescriptorSetLayout> descriptorSetLayout =
1800         DescriptorSetLayoutBuilder()
1801             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
1802             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
1803             .build(vkd, device);
1804     const Move<VkDescriptorPool> descriptorPool =
1805         DescriptorPoolBuilder()
1806             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1807             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1808             .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1809     const Move<VkDescriptorSet> descriptorSet   = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
1810     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
1811 
1812     de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
1813     m_data.testConfiguration->initRayTracingShaders(rayTracingPipeline, m_context, m_data);
1814     Move<VkPipeline> pipeline = rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
1815 
1816     de::MovePtr<BufferWithMemory> raygenShaderBindingTable;
1817     de::MovePtr<BufferWithMemory> hitShaderBindingTable;
1818     de::MovePtr<BufferWithMemory> missShaderBindingTable;
1819     m_data.testConfiguration->initShaderBindingTables(
1820         rayTracingPipeline, m_context, m_data, *pipeline, shaderGroupHandleSize, shaderGroupBaseAlignment,
1821         raygenShaderBindingTable, hitShaderBindingTable, missShaderBindingTable);
1822 
1823     const VkStridedDeviceAddressRegionKHR raygenShaderBindingTableRegion =
1824         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0),
1825                                           shaderGroupHandleSize, shaderGroupHandleSize);
1826     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
1827         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
1828                                           shaderGroupHandleSize, shaderGroupHandleSize);
1829     const VkStridedDeviceAddressRegionKHR hitShaderBindingTableRegion =
1830         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0),
1831                                           shaderGroupHandleSize, shaderGroupHandleSize);
1832     const VkStridedDeviceAddressRegionKHR callableShaderBindingTableRegion =
1833         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
1834 
1835     const VkFormat imageFormat              = m_data.testConfiguration->getResultImageFormat();
1836     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
1837     const VkImageSubresourceRange imageSubresourceRange =
1838         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
1839     const de::MovePtr<ImageWithMemory> image = de::MovePtr<ImageWithMemory>(
1840         new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
1841     const Move<VkImageView> imageView =
1842         makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
1843 
1844     const VkBufferCreateInfo resultBufferCreateInfo = makeBufferCreateInfo(
1845         pixelCount * m_data.testConfiguration->getResultImageFormatSize(), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1846     const VkImageSubresourceLayers resultBufferImageSubresourceLayers =
1847         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1848     const VkBufferImageCopy resultBufferImageRegion =
1849         makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), resultBufferImageSubresourceLayers);
1850     de::MovePtr<BufferWithMemory> resultBuffer = de::MovePtr<BufferWithMemory>(
1851         new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
1852 
1853     const VkDescriptorImageInfo descriptorImageInfo =
1854         makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
1855 
1856     const Move<VkCommandPool> cmdPool = createCommandPool(vkd, device, 0, queueFamilyIndex);
1857     const Move<VkCommandBuffer> cmdBuffer =
1858         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1859 
1860     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelAccelerationStructures;
1861     de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure;
1862     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelAccelerationStructureCopies;
1863     de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructureCopy;
1864     std::vector<de::SharedPtr<SerialStorage>> bottomSerialized;
1865     std::vector<de::SharedPtr<SerialStorage>> topSerialized;
1866     std::vector<VkDeviceSize> accelerationCompactedSizes;
1867     std::vector<VkDeviceSize> accelerationSerialSizes;
1868     Move<VkQueryPool> m_queryPoolCompact;
1869     Move<VkQueryPool> m_queryPoolSerial;
1870 
1871     beginCommandBuffer(vkd, *cmdBuffer, 0u);
1872     {
1873         const VkImageMemoryBarrier preImageBarrier =
1874             makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
1875                                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image, imageSubresourceRange);
1876         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1877                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
1878         const VkClearValue clearValue = m_data.testConfiguration->getClearValue();
1879         vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
1880                                &imageSubresourceRange);
1881         const VkImageMemoryBarrier postImageBarrier = makeImageMemoryBarrier(
1882             VK_ACCESS_TRANSFER_WRITE_BIT,
1883             VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
1884             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image, imageSubresourceRange);
1885         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
1886                                       VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
1887 
1888         // build bottom level acceleration structures and their copies ( only when we are testing copying bottom level acceleration structures )
1889         bool bottomCompact = m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1890         bool bottomSerial  = m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
1891         const bool buildWithoutGeom   = (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM);
1892         const bool bottomNoPrimitives = (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM);
1893         const bool topNoPrimitives    = (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP);
1894         const bool inactiveInstances  = (m_data.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_INSTANCES);
1895         bottomLevelAccelerationStructures =
1896             m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
1897         VkBuildAccelerationStructureFlagsKHR allowCompactionFlag =
1898             VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
1899         VkBuildAccelerationStructureFlagsKHR emptyCompactionFlag = VkBuildAccelerationStructureFlagsKHR(0);
1900         VkBuildAccelerationStructureFlagsKHR bottomCompactFlags =
1901             (bottomCompact ? allowCompactionFlag : emptyCompactionFlag);
1902         VkBuildAccelerationStructureFlagsKHR bottomBuildFlags = m_data.buildFlags | bottomCompactFlags;
1903         std::vector<VkAccelerationStructureKHR> accelerationStructureHandles;
1904         std::vector<VkDeviceSize> bottomBlasCompactSize;
1905         std::vector<VkDeviceSize> bottomBlasSerialSize;
1906 
1907         for (auto &blas : bottomLevelAccelerationStructures)
1908         {
1909             blas->setBuildType(m_data.buildType);
1910             blas->setBuildFlags(bottomBuildFlags);
1911             blas->setUseArrayOfPointers(m_data.bottomUsesAOP);
1912             blas->setCreateGeneric(m_data.bottomGeneric);
1913             blas->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
1914             blas->setBuildWithoutGeometries(buildWithoutGeom);
1915             blas->setBuildWithoutPrimitives(bottomNoPrimitives);
1916             blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
1917             accelerationStructureHandles.push_back(*(blas->getPtr()));
1918         }
1919 
1920         if (m_data.operationType == OP_COMPACT)
1921         {
1922             uint32_t queryCount = (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ?
1923                                       uint32_t(bottomLevelAccelerationStructures.size()) :
1924                                       1u;
1925             if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1926                 m_queryPoolCompact =
1927                     makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
1928             if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1929                 queryAccelerationStructureSize(
1930                     vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolCompact.get(),
1931                     VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, bottomBlasCompactSize);
1932         }
1933         if (m_data.operationType == OP_SERIALIZE)
1934         {
1935             uint32_t queryCount = (m_data.operationTarget == OT_BOTTOM_ACCELERATION) ?
1936                                       uint32_t(bottomLevelAccelerationStructures.size()) :
1937                                       1u;
1938             if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
1939                 m_queryPoolSerial =
1940                     makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
1941             if (m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1942                 queryAccelerationStructureSize(
1943                     vkd, device, *cmdBuffer, accelerationStructureHandles, m_data.buildType, m_queryPoolSerial.get(),
1944                     VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, bottomBlasSerialSize);
1945         }
1946 
1947         // if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
1948         if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (bottomCompact || bottomSerial))
1949         {
1950             endCommandBuffer(vkd, *cmdBuffer);
1951 
1952             submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1953 
1954             if (bottomCompact)
1955                 VK_CHECK(vkd.getQueryPoolResults(
1956                     device, *m_queryPoolCompact, 0u, uint32_t(bottomBlasCompactSize.size()),
1957                     sizeof(VkDeviceSize) * bottomBlasCompactSize.size(), bottomBlasCompactSize.data(),
1958                     sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1959             if (bottomSerial)
1960                 VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, uint32_t(bottomBlasSerialSize.size()),
1961                                                  sizeof(VkDeviceSize) * bottomBlasSerialSize.size(),
1962                                                  bottomBlasSerialSize.data(), sizeof(VkDeviceSize),
1963                                                  VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
1964 
1965             vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
1966             beginCommandBuffer(vkd, *cmdBuffer, 0u);
1967         }
1968 
1969         auto bottomLevelAccelerationStructuresPtr = &bottomLevelAccelerationStructures;
1970         if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_BOTTOM_ACCELERATION)
1971         {
1972             switch (m_data.operationType)
1973             {
1974             case OP_COPY:
1975             {
1976                 for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1977                 {
1978                     de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1979                     asCopy->setDeferredOperation(htCopy, workerThreadsCount);
1980                     asCopy->setBuildType(m_data.buildType);
1981                     asCopy->setBuildFlags(m_data.buildFlags);
1982                     asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
1983                     asCopy->setCreateGeneric(m_data.bottomGeneric);
1984                     asCopy->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
1985                     asCopy->setBuildWithoutGeometries(buildWithoutGeom);
1986                     asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
1987                     asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator,
1988                                               bottomLevelAccelerationStructures[i].get(), 0u, 0u);
1989                     bottomLevelAccelerationStructureCopies.push_back(
1990                         de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
1991                 }
1992                 break;
1993             }
1994             case OP_COMPACT:
1995             {
1996                 for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
1997                 {
1998                     de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
1999                     asCopy->setBuildType(m_data.buildType);
2000                     asCopy->setBuildFlags(m_data.buildFlags);
2001                     asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
2002                     asCopy->setCreateGeneric(m_data.bottomGeneric);
2003                     asCopy->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
2004                     asCopy->setBuildWithoutGeometries(buildWithoutGeom);
2005                     asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
2006                     asCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator,
2007                                               bottomLevelAccelerationStructures[i].get(), bottomBlasCompactSize[i], 0u);
2008                     bottomLevelAccelerationStructureCopies.push_back(
2009                         de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
2010                 }
2011                 break;
2012             }
2013             case OP_SERIALIZE:
2014             {
2015                 //bottomLevelAccelerationStructureCopies = m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
2016                 for (size_t i = 0; i < bottomLevelAccelerationStructures.size(); ++i)
2017                 {
2018                     de::SharedPtr<SerialStorage> storage(
2019                         new SerialStorage(vkd, device, allocator, m_data.buildType, bottomBlasSerialSize[i]));
2020 
2021                     bottomLevelAccelerationStructures[i]->setDeferredOperation(htSerialize, workerThreadsCount);
2022                     bottomLevelAccelerationStructures[i]->serialize(vkd, device, *cmdBuffer, storage.get());
2023                     bottomSerialized.push_back(storage);
2024 
2025                     if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2026                     {
2027                         endCommandBuffer(vkd, *cmdBuffer);
2028 
2029                         submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2030 
2031                         vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2032                         beginCommandBuffer(vkd, *cmdBuffer, 0u);
2033                     }
2034 
2035                     de::MovePtr<BottomLevelAccelerationStructure> asCopy = makeBottomLevelAccelerationStructure();
2036                     asCopy->setBuildType(m_data.buildType);
2037                     asCopy->setBuildFlags(m_data.buildFlags);
2038                     asCopy->setUseArrayOfPointers(m_data.bottomUsesAOP);
2039                     asCopy->setCreateGeneric(m_data.bottomGeneric);
2040                     asCopy->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
2041                     asCopy->setBuildWithoutGeometries(buildWithoutGeom);
2042                     asCopy->setBuildWithoutPrimitives(bottomNoPrimitives);
2043                     asCopy->setDeferredOperation(htSerialize, workerThreadsCount);
2044                     asCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, storage.get(), 0u);
2045                     bottomLevelAccelerationStructureCopies.push_back(
2046                         de::SharedPtr<BottomLevelAccelerationStructure>(asCopy.release()));
2047                 }
2048                 break;
2049             }
2050             default:
2051                 DE_ASSERT(false);
2052             }
2053             bottomLevelAccelerationStructuresPtr = &bottomLevelAccelerationStructureCopies;
2054         }
2055 
2056         // build top level acceleration structures and their copies ( only when we are testing copying top level acceleration structures )
2057         bool topCompact = m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_TOP_ACCELERATION;
2058         bool topSerial  = m_data.operationType == OP_SERIALIZE && m_data.operationTarget == OT_TOP_ACCELERATION;
2059         VkBuildAccelerationStructureFlagsKHR topCompactFlags = (topCompact ? allowCompactionFlag : emptyCompactionFlag);
2060         VkBuildAccelerationStructureFlagsKHR topBuildFlags   = m_data.buildFlags | topCompactFlags;
2061         std::vector<VkAccelerationStructureKHR> topLevelStructureHandles;
2062         std::vector<VkDeviceSize> topBlasCompactSize;
2063         std::vector<VkDeviceSize> topBlasSerialSize;
2064 
2065         topLevelAccelerationStructure = m_data.testConfiguration->initTopAccelerationStructure(
2066             m_context, m_data, *bottomLevelAccelerationStructuresPtr);
2067         topLevelAccelerationStructure->setBuildType(m_data.buildType);
2068         topLevelAccelerationStructure->setBuildFlags(topBuildFlags);
2069         topLevelAccelerationStructure->setBuildWithoutPrimitives(topNoPrimitives);
2070         topLevelAccelerationStructure->setUseArrayOfPointers(m_data.topUsesAOP);
2071         topLevelAccelerationStructure->setCreateGeneric(m_data.topGeneric);
2072         topLevelAccelerationStructure->setCreationBufferUnbounded(m_data.topUnboundedCreation);
2073         topLevelAccelerationStructure->setInactiveInstances(inactiveInstances);
2074         topLevelAccelerationStructure->createAndBuild(vkd, device, *cmdBuffer, allocator);
2075         topLevelStructureHandles.push_back(*(topLevelAccelerationStructure->getPtr()));
2076 
2077         if (topCompact)
2078             queryAccelerationStructureSize(
2079                 vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolCompact.get(),
2080                 VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, topBlasCompactSize);
2081         if (topSerial)
2082             queryAccelerationStructureSize(
2083                 vkd, device, *cmdBuffer, topLevelStructureHandles, m_data.buildType, m_queryPoolSerial.get(),
2084                 VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, topBlasSerialSize);
2085 
2086         // if AS is built on GPU and we are planning to make a compact copy of it or serialize / deserialize it - we have to have download query results to CPU
2087         if ((m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR) && (topCompact || topSerial))
2088         {
2089             endCommandBuffer(vkd, *cmdBuffer);
2090 
2091             submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2092 
2093             if (topCompact)
2094                 VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolCompact, 0u, uint32_t(topBlasCompactSize.size()),
2095                                                  sizeof(VkDeviceSize) * topBlasCompactSize.size(),
2096                                                  topBlasCompactSize.data(), sizeof(VkDeviceSize),
2097                                                  VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2098             if (topSerial)
2099                 VK_CHECK(vkd.getQueryPoolResults(device, *m_queryPoolSerial, 0u, uint32_t(topBlasSerialSize.size()),
2100                                                  sizeof(VkDeviceSize) * topBlasSerialSize.size(),
2101                                                  topBlasSerialSize.data(), sizeof(VkDeviceSize),
2102                                                  VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
2103 
2104             vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2105             beginCommandBuffer(vkd, *cmdBuffer, 0u);
2106         }
2107 
2108         const TopLevelAccelerationStructure *topLevelRayTracedPtr = topLevelAccelerationStructure.get();
2109         if (m_data.operationType != OP_NONE && m_data.operationTarget == OT_TOP_ACCELERATION)
2110         {
2111             switch (m_data.operationType)
2112             {
2113             case OP_COPY:
2114             {
2115                 topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2116                 topLevelAccelerationStructureCopy->setDeferredOperation(htCopy, workerThreadsCount);
2117                 topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2118                 topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2119                 topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2120                 topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2121                 topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2122                 topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2123                 topLevelAccelerationStructureCopy->setCreationBufferUnbounded(m_data.topUnboundedCreation);
2124                 topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator,
2125                                                                      topLevelAccelerationStructure.get(), 0u, 0u);
2126                 break;
2127             }
2128             case OP_COMPACT:
2129             {
2130                 topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2131                 topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2132                 topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2133                 topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2134                 topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2135                 topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2136                 topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2137                 topLevelAccelerationStructureCopy->setCreationBufferUnbounded(m_data.topUnboundedCreation);
2138                 topLevelAccelerationStructureCopy->createAndCopyFrom(
2139                     vkd, device, *cmdBuffer, allocator, topLevelAccelerationStructure.get(), topBlasCompactSize[0], 0u);
2140                 break;
2141             }
2142             case OP_SERIALIZE:
2143             {
2144                 de::SharedPtr<SerialStorage> storage = de::SharedPtr<SerialStorage>(
2145                     new SerialStorage(vkd, device, allocator, m_data.buildType, topBlasSerialSize[0]));
2146 
2147                 topLevelAccelerationStructure->setDeferredOperation(htSerialize, workerThreadsCount);
2148                 topLevelAccelerationStructure->serialize(vkd, device, *cmdBuffer, storage.get());
2149                 topSerialized.push_back(storage);
2150 
2151                 if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
2152                 {
2153                     endCommandBuffer(vkd, *cmdBuffer);
2154 
2155                     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2156 
2157                     vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
2158                     beginCommandBuffer(vkd, *cmdBuffer, 0u);
2159                 }
2160 
2161                 topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2162                 topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2163                 topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2164                 topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2165                 topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2166                 topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2167                 topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2168                 topLevelAccelerationStructureCopy->setCreationBufferUnbounded(m_data.topUnboundedCreation);
2169                 topLevelAccelerationStructureCopy->setDeferredOperation(htSerialize, workerThreadsCount);
2170                 topLevelAccelerationStructureCopy->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator,
2171                                                                             storage.get(), 0u);
2172                 break;
2173             }
2174             case OP_UPDATE:
2175             {
2176                 topLevelAccelerationStructureCopy = m_data.testConfiguration->initTopAccelerationStructure(
2177                     m_context, m_data, *bottomLevelAccelerationStructuresPtr);
2178                 topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2179                 topLevelAccelerationStructureCopy->create(vkd, device, allocator, 0u, 0u);
2180                 // Update AS based on topLevelAccelerationStructure
2181                 topLevelAccelerationStructureCopy->build(vkd, device, *cmdBuffer, topLevelAccelerationStructure.get());
2182                 break;
2183             }
2184             case OP_UPDATE_IN_PLACE:
2185             {
2186                 // Update in place
2187                 topLevelAccelerationStructure->build(vkd, device, *cmdBuffer, topLevelAccelerationStructure.get());
2188                 // Make a coppy
2189                 topLevelAccelerationStructureCopy = makeTopLevelAccelerationStructure();
2190                 topLevelAccelerationStructureCopy->setDeferredOperation(htCopy, workerThreadsCount);
2191                 topLevelAccelerationStructureCopy->setBuildType(m_data.buildType);
2192                 topLevelAccelerationStructureCopy->setBuildFlags(m_data.buildFlags);
2193                 topLevelAccelerationStructureCopy->setBuildWithoutPrimitives(topNoPrimitives);
2194                 topLevelAccelerationStructureCopy->setInactiveInstances(inactiveInstances);
2195                 topLevelAccelerationStructureCopy->setUseArrayOfPointers(m_data.topUsesAOP);
2196                 topLevelAccelerationStructureCopy->setCreateGeneric(m_data.topGeneric);
2197                 topLevelAccelerationStructureCopy->createAndCopyFrom(vkd, device, *cmdBuffer, allocator,
2198                                                                      topLevelAccelerationStructure.get(), 0u, 0u);
2199                 break;
2200             }
2201             default:
2202                 DE_ASSERT(false);
2203             }
2204             topLevelRayTracedPtr = topLevelAccelerationStructureCopy.get();
2205         }
2206 
2207         const VkMemoryBarrier preTraceMemoryBarrier =
2208             makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
2209         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
2210                                  VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &preTraceMemoryBarrier);
2211 
2212         VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
2213             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
2214             DE_NULL,                                                           //  const void* pNext;
2215             1u,                                                                //  uint32_t accelerationStructureCount;
2216             topLevelRayTracedPtr->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
2217         };
2218 
2219         DescriptorSetUpdateBuilder()
2220             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2221                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
2222             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2223                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
2224             .update(vkd, device);
2225 
2226         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
2227                                   &descriptorSet.get(), 0, DE_NULL);
2228 
2229         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
2230 
2231         cmdTraceRays(vkd, *cmdBuffer, &raygenShaderBindingTableRegion, &missShaderBindingTableRegion,
2232                      &hitShaderBindingTableRegion, &callableShaderBindingTableRegion, m_data.width, m_data.height, 1);
2233 
2234         const VkMemoryBarrier postTraceMemoryBarrier =
2235             makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2236         const VkMemoryBarrier postCopyMemoryBarrier =
2237             makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
2238         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
2239                                  VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
2240 
2241         vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u,
2242                                  &resultBufferImageRegion);
2243 
2244         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
2245                                  &postCopyMemoryBarrier);
2246     }
2247     endCommandBuffer(vkd, *cmdBuffer);
2248 
2249     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2250 
2251     invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(),
2252                                 resultBuffer->getAllocation().getOffset(), pixelCount * sizeof(uint32_t));
2253 
2254     return resultBuffer;
2255 }
2256 
iterateNoWorkers(void)2257 bool RayTracingASBasicTestInstance::iterateNoWorkers(void)
2258 {
2259     // run test using arrays of pointers
2260     const de::MovePtr<BufferWithMemory> buffer = runTest(0);
2261 
2262     return m_data.testConfiguration->verifyImage(buffer.get(), m_context, m_data);
2263 }
2264 
iterateWithWorkers(void)2265 bool RayTracingASBasicTestInstance::iterateWithWorkers(void)
2266 {
2267     de::MovePtr<BufferWithMemory> singleThreadBufferCPU = runTest(0);
2268     const bool singleThreadValidation =
2269         m_data.testConfiguration->verifyImage(singleThreadBufferCPU.get(), m_context, m_data);
2270 
2271     de::MovePtr<BufferWithMemory> multiThreadBufferCPU = runTest(m_data.workerThreadsCount);
2272     const bool multiThreadValidation =
2273         m_data.testConfiguration->verifyImage(multiThreadBufferCPU.get(), m_context, m_data);
2274 
2275     const uint32_t result = singleThreadValidation && multiThreadValidation;
2276 
2277     return result;
2278 }
2279 
iterate(void)2280 tcu::TestStatus RayTracingASBasicTestInstance::iterate(void)
2281 {
2282     bool result;
2283 
2284     if (m_data.workerThreadsCount != 0)
2285         result = iterateWithWorkers();
2286     else
2287         result = iterateNoWorkers();
2288 
2289     if (result)
2290         return tcu::TestStatus::pass("Pass");
2291     else
2292         return tcu::TestStatus::fail("Fail");
2293 }
2294 
2295 // Tests dynamic indexing of acceleration structures
2296 class RayTracingASDynamicIndexingTestCase : public TestCase
2297 {
2298 public:
2299     RayTracingASDynamicIndexingTestCase(tcu::TestContext &context, const char *name);
2300     ~RayTracingASDynamicIndexingTestCase(void) = default;
2301 
2302     void checkSupport(Context &context) const override;
2303     void initPrograms(SourceCollections &programCollection) const override;
2304     TestInstance *createInstance(Context &context) const override;
2305 };
2306 
2307 class RayTracingASDynamicIndexingTestInstance : public TestInstance
2308 {
2309 public:
2310     RayTracingASDynamicIndexingTestInstance(Context &context);
2311     ~RayTracingASDynamicIndexingTestInstance(void) = default;
2312     tcu::TestStatus iterate(void) override;
2313 };
2314 
RayTracingASDynamicIndexingTestCase(tcu::TestContext & context,const char * name)2315 RayTracingASDynamicIndexingTestCase::RayTracingASDynamicIndexingTestCase(tcu::TestContext &context, const char *name)
2316     : TestCase(context, name)
2317 {
2318 }
2319 
checkSupport(Context & context) const2320 void RayTracingASDynamicIndexingTestCase::checkSupport(Context &context) const
2321 {
2322     commonASTestsCheckSupport(context);
2323     context.requireDeviceFunctionality("VK_EXT_descriptor_indexing");
2324 }
2325 
initPrograms(SourceCollections & programCollection) const2326 void RayTracingASDynamicIndexingTestCase::initPrograms(SourceCollections &programCollection) const
2327 {
2328     const vk::SpirVAsmBuildOptions spvBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, true);
2329     const vk::ShaderBuildOptions glslBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
2330 
2331     // raygen shader is defined in spir-v as it requires possing pointer to TLAS that was read from ssbo;
2332     // original spir-v code was generated using following glsl code but resulting spir-v code was modiifed
2333     //
2334     // #version 460 core
2335     // #extension GL_EXT_ray_tracing : require
2336     // #extension GL_EXT_nonuniform_qualifier : enable
2337     // #define ARRAY_SIZE 500
2338     // layout(location = 0) rayPayloadEXT uvec2 payload;    // offset and flag indicating if we are using descriptors or pointers
2339 
2340     // layout(set = 0, binding = 0) uniform accelerationStructureEXT tlasArray[ARRAY_SIZE];
2341     // layout(set = 0, binding = 1) readonly buffer topLevelASPointers {
2342     //     uvec2 ptr[];
2343     // } tlasPointers;
2344     // layout(set = 0, binding = 2) readonly buffer topLevelASIndices {
2345     //     uint idx[];
2346     // } tlasIndices;
2347     // layout(set = 0, binding = 3, std430) writeonly buffer Result {
2348     //     uint value[];
2349     // } result;
2350 
2351     // void main()
2352     // {
2353     //   float tmin            = 0.0;\n"
2354     //   float tmax            = 2.0;\n"
2355     //   vec3  origin          = vec3(0.25f, 0.5f, 1.0);\n"
2356     //   vec3  direction       = vec3(0.0,0.0,-1.0);\n"
2357     //   uint  activeTlasIndex = gl_LaunchIDEXT.x;\n"
2358     //   uint  activeTlasCount = gl_LaunchSizeEXT.x;\n"
2359     //   uint  tlasIndex       = tlasIndices.idx[nonuniformEXT(activeTlasIndex)];\n"
2360 
2361     //   atomicAdd(result.value[nonuniformEXT(activeTlasIndex)], 2);\n"
2362     //   payload = uvec2(activeTlasIndex + activeTlasCount.x, 0);\n"
2363     //   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);\n"
2364 
2365     //   atomicAdd(result.value[nonuniformEXT(activeTlasIndex + activeTlasCount * 2)], 5);\n"
2366     //   payload = uvec2(activeTlasIndex + activeTlasCount * 3, 1);\n"
2367     //   traceRayEXT(tlasArray[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);                // used to generate initial spirv
2368     //   //traceRayEXT(*tlasPointers.ptr[nonuniformEXT(tlasIndex)], gl_RayFlagsCullBackFacingTrianglesEXT, 0xFF, 0, 0, 0, origin, tmin, direction, tmax, 0);    // not available in glsl but should be done in spirv
2369     // };
2370 
2371     const std::string rgenSource =
2372         "OpCapability RayTracingKHR\n"
2373         "OpCapability ShaderNonUniform\n"
2374         "OpExtension \"SPV_EXT_descriptor_indexing\"\n"
2375         "OpExtension \"SPV_KHR_ray_tracing\"\n"
2376         "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2377         "OpMemoryModel Logical GLSL450\n"
2378         "OpEntryPoint RayGenerationKHR %4 \"main\" %27 %33 %var_tlas_indices %var_result %60 %var_as_arr_ptr "
2379         "%var_as_pointers_ssbo\n"
2380         "OpDecorate %27 BuiltIn LaunchIdNV\n"
2381         "OpDecorate %33 BuiltIn LaunchSizeNV\n"
2382         "OpDecorate %37 ArrayStride 4\n"
2383         "OpMemberDecorate %38 0 NonWritable\n"
2384         "OpMemberDecorate %38 0 Offset 0\n"
2385         "OpDecorate %38 Block\n"
2386         "OpDecorate %var_tlas_indices DescriptorSet 0\n"
2387         "OpDecorate %var_tlas_indices Binding 2\n"
2388         "OpDecorate %44 NonUniform\n"
2389         "OpDecorate %46 NonUniform\n"
2390         "OpDecorate %47 NonUniform\n"
2391         "OpDecorate %48 ArrayStride 4\n"
2392         "OpMemberDecorate %49 0 NonReadable\n"
2393         "OpMemberDecorate %49 0 Offset 0\n"
2394         "OpDecorate %49 Block\n"
2395         "OpDecorate %var_result DescriptorSet 0\n"
2396         "OpDecorate %var_result Binding 3\n"
2397         "OpDecorate %53 NonUniform\n"
2398         "OpDecorate %60 Location 0\n"
2399         "OpDecorate %var_as_arr_ptr DescriptorSet 0\n"
2400         "OpDecorate %var_as_arr_ptr Binding 0\n"
2401         "OpDecorate %71 NonUniform\n"
2402         "OpDecorate %73 NonUniform\n"
2403         "OpDecorate %74 NonUniform\n"
2404         "OpDecorate %85 NonUniform\n"
2405         "OpDecorate %as_index NonUniform\n"
2406         "OpDecorate %as_device_addres NonUniform\n"
2407         "OpDecorate %104 ArrayStride 8\n"
2408         "OpMemberDecorate %105 0 NonWritable\n"
2409         "OpMemberDecorate %105 0 Offset 0\n"
2410         "OpDecorate %105 Block\n"
2411         "OpDecorate %var_as_pointers_ssbo DescriptorSet 0\n"
2412         "OpDecorate %var_as_pointers_ssbo Binding 1\n"
2413         // types, constants and variables
2414         "%2 = OpTypeVoid\n"
2415         "%3 = OpTypeFunction %2\n"
2416         "%6 = OpTypeFloat 32\n"
2417         "%7 = OpTypePointer Function %6\n"
2418         "%9 = OpConstant %6 0\n"
2419         "%11 = OpConstant %6 2\n"
2420         "%12 = OpTypeVector %6 3\n"
2421         "%13 = OpTypePointer Function %12\n"
2422         "%15 = OpConstant %6 0.25\n"
2423         "%16 = OpConstant %6 0.5\n"
2424         "%17 = OpConstant %6 1\n"
2425         "%18 = OpConstantComposite %12 %15 %16 %17\n"
2426         "%20 = OpConstant %6 -1\n"
2427         "%21 = OpConstantComposite %12 %9 %9 %20\n"
2428         "%type_uint32 = OpTypeInt 32 0\n"
2429         "%23 = OpTypePointer Function %type_uint32\n"
2430         "%25 = OpTypeVector %type_uint32 3\n"
2431         "%26 = OpTypePointer Input %25\n"
2432         "%27 = OpVariable %26 Input\n"
2433         "%28 = OpConstant %type_uint32 0\n"
2434         "%29 = OpTypePointer Input %type_uint32\n"
2435         "%33 = OpVariable %26 Input\n"
2436         "%37 = OpTypeRuntimeArray %type_uint32\n"
2437         "%38 = OpTypeStruct %37\n"
2438         "%39 = OpTypePointer StorageBuffer %38\n"
2439         "%var_tlas_indices = OpVariable %39 StorageBuffer\n"
2440         "%type_int32 = OpTypeInt 32 1\n"
2441         "%c_int32_0 = OpConstant %type_int32 0\n"
2442         "%45 = OpTypePointer StorageBuffer %type_uint32\n"
2443         "%48 = OpTypeRuntimeArray %type_uint32\n"
2444         "%49 = OpTypeStruct %48\n"
2445         "%50 = OpTypePointer StorageBuffer %49\n"
2446         "%var_result = OpVariable %50 StorageBuffer\n"
2447         "%55 = OpConstant %type_uint32 2\n"
2448         "%56 = OpConstant %type_uint32 1\n"
2449         "%58 = OpTypeVector %type_uint32 2\n"
2450         "%59 = OpTypePointer RayPayloadNV %58\n"
2451         "%60 = OpVariable %59 RayPayloadNV\n"
2452         "%type_as = OpTypeAccelerationStructureKHR\n"
2453         "%66 = OpConstant %type_uint32 500\n"
2454         "%67 = OpTypeArray %type_as %66\n"
2455         "%68 = OpTypePointer UniformConstant %67\n"
2456         "%var_as_arr_ptr = OpVariable %68 UniformConstant\n"
2457         "%72 = OpTypePointer UniformConstant %type_as\n"
2458         "%75 = OpConstant %type_uint32 16\n"
2459         "%76 = OpConstant %type_uint32 255\n"
2460         "%87 = OpConstant %type_uint32 5\n"
2461         "%91 = OpConstant %type_uint32 3\n"
2462 
2463         // <changed_section>
2464         "%104 = OpTypeRuntimeArray %58\n"
2465         "%105 = OpTypeStruct %104\n"
2466         "%106 = OpTypePointer StorageBuffer %105\n"
2467         "%var_as_pointers_ssbo = OpVariable %106 StorageBuffer\n"
2468         "%type_uint64_ssbo_ptr = OpTypePointer StorageBuffer %58\n"
2469         // </changed_section>
2470 
2471         // void main()
2472         "%4 = OpFunction %2 None %3\n"
2473         "%5 = OpLabel\n"
2474         "%8 = OpVariable %7 Function\n"
2475         "%10 = OpVariable %7 Function\n"
2476         "%14 = OpVariable %13 Function\n"
2477         "%19 = OpVariable %13 Function\n"
2478         "%24 = OpVariable %23 Function\n"
2479         "%32 = OpVariable %23 Function\n"
2480         "%36 = OpVariable %23 Function\n"
2481         "OpStore %8 %9\n"
2482         "OpStore %10 %11\n"
2483         "OpStore %14 %18\n"
2484         "OpStore %19 %21\n"
2485         "%30 = OpAccessChain %29 %27 %28\n"
2486         "%31 = OpLoad %type_uint32 %30\n"
2487         "OpStore %24 %31\n"
2488         "%34 = OpAccessChain %29 %33 %28\n"
2489         "%35 = OpLoad %type_uint32 %34\n"
2490         "OpStore %32 %35\n"
2491         "%43 = OpLoad %type_uint32 %24\n"
2492         "%44 = OpCopyObject %type_uint32 %43\n"
2493         "%46 = OpAccessChain %45 %var_tlas_indices %c_int32_0 %44\n"
2494         "%47 = OpLoad %type_uint32 %46\n"
2495         "OpStore %36 %47\n"
2496         // atomicAdd
2497         "%52 = OpLoad %type_uint32 %24\n"
2498         "%53 = OpCopyObject %type_uint32 %52\n"
2499         "%54 = OpAccessChain %45 %var_result %c_int32_0 %53\n"
2500         "%57 = OpAtomicIAdd %type_uint32 %54 %56 %28 %55\n"
2501         // setup payload
2502         "%61 = OpLoad %type_uint32 %24\n"
2503         "%62 = OpLoad %type_uint32 %32\n"
2504         "%63 = OpIAdd %type_uint32 %61 %62\n"
2505         "%64 = OpCompositeConstruct %58 %63 %28\n"
2506         "OpStore %60 %64\n"
2507         // trace rays using tlas from array
2508         "%70 = OpLoad %type_uint32 %36\n"
2509         "%71 = OpCopyObject %type_uint32 %70\n"
2510         "%73 = OpAccessChain %72 %var_as_arr_ptr %71\n"
2511         "%74 = OpLoad %type_as %73\n"
2512         "%77 = OpLoad %12 %14\n"
2513         "%78 = OpLoad %6 %8\n"
2514         "%79 = OpLoad %12 %19\n"
2515         "%80 = OpLoad %6 %10\n"
2516         "OpTraceRayKHR %74 %75 %76 %28 %28 %28 %77 %78 %79 %80 %60\n"
2517         // atomicAdd
2518         "%81 = OpLoad %type_uint32 %24\n"
2519         "%82 = OpLoad %type_uint32 %32\n"
2520         "%83 = OpIMul %type_uint32 %82 %55\n"
2521         "%84 = OpIAdd %type_uint32 %81 %83\n"
2522         "%85 = OpCopyObject %type_uint32 %84\n"
2523         "%86 = OpAccessChain %45 %var_result %c_int32_0 %85\n"
2524         "%88 = OpAtomicIAdd %type_uint32 %86 %56 %28 %87\n"
2525         // setup payload
2526         "%89 = OpLoad %type_uint32 %24\n"
2527         "%90 = OpLoad %type_uint32 %32\n"
2528         "%92 = OpIMul %type_uint32 %90 %91\n"
2529         "%93 = OpIAdd %type_uint32 %89 %92\n"
2530         "%94 = OpCompositeConstruct %58 %93 %56\n"
2531         "OpStore %60 %94\n"
2532         // trace rays using pointers to tlas
2533         "%95 = OpLoad %type_uint32 %36\n"
2534         "%as_index = OpCopyObject %type_uint32 %95\n"
2535 
2536         // <changed_section> OLD
2537         "%as_device_addres_ptr = OpAccessChain %type_uint64_ssbo_ptr %var_as_pointers_ssbo %c_int32_0 "
2538         "%as_index\n"
2539         "%as_device_addres = OpLoad %58 %as_device_addres_ptr\n"
2540         "%as_to_use = OpConvertUToAccelerationStructureKHR %type_as %as_device_addres\n"
2541         // </changed_section>
2542 
2543         "%99 = OpLoad %12 %14\n"
2544         "%100 = OpLoad %6 %8\n"
2545         "%101 = OpLoad %12 %19\n"
2546         "%102 = OpLoad %6 %10\n"
2547         "OpTraceRayKHR %as_to_use %75 %76 %28 %28 %28 %99 %100 %101 %102 %60\n"
2548         "OpReturn\n"
2549         "OpFunctionEnd\n";
2550     programCollection.spirvAsmSources.add("rgen") << rgenSource << spvBuildOptions;
2551 
2552     std::string chitSource = "#version 460 core\n"
2553                              "#extension GL_EXT_ray_tracing : require\n"
2554                              "#extension GL_EXT_nonuniform_qualifier : enable\n"
2555                              "layout(location = 0) rayPayloadInEXT uvec2 payload;\n"
2556                              "\n"
2557                              "layout(set = 0, binding = 3) writeonly buffer Result {\n"
2558                              "    uint value[];\n"
2559                              "} result;\n"
2560                              "void main()\n"
2561                              "{\n"
2562                              // payload.y is 0 or 1 so we will add 3 or 7 (just two prime numbers)
2563                              "    atomicAdd(result.value[nonuniformEXT(payload.x)], 3 + payload.y * 4);\n"
2564                              "}\n";
2565     programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitSource) << glslBuildOptions;
2566 }
2567 
createInstance(Context & context) const2568 TestInstance *RayTracingASDynamicIndexingTestCase::createInstance(Context &context) const
2569 {
2570     return new RayTracingASDynamicIndexingTestInstance(context);
2571 }
2572 
RayTracingASDynamicIndexingTestInstance(Context & context)2573 RayTracingASDynamicIndexingTestInstance::RayTracingASDynamicIndexingTestInstance(Context &context)
2574     : vkt::TestInstance(context)
2575 {
2576 }
2577 
iterate(void)2578 tcu::TestStatus RayTracingASDynamicIndexingTestInstance::iterate(void)
2579 {
2580     const InstanceInterface &vki            = m_context.getInstanceInterface();
2581     const DeviceInterface &vkd              = m_context.getDeviceInterface();
2582     const VkDevice device                   = m_context.getDevice();
2583     const VkPhysicalDevice physicalDevice   = m_context.getPhysicalDevice();
2584     const uint32_t queueFamilyIndex         = m_context.getUniversalQueueFamilyIndex();
2585     const VkQueue queue                     = m_context.getUniversalQueue();
2586     Allocator &allocator                    = m_context.getDefaultAllocator();
2587     const uint32_t shaderGroupHandleSize    = getShaderGroupSize(vki, physicalDevice);
2588     const uint32_t shaderGroupBaseAlignment = getShaderGroupBaseAlignment(vki, physicalDevice);
2589     const uint32_t tlasCount                = 500; // changing this will require also changing shaders
2590     const uint32_t activeTlasCount          = 32;  // number of tlas out of <tlasCount> that will be active
2591 
2592     const Move<VkDescriptorSetLayout> descriptorSetLayout =
2593         DescriptorSetLayoutBuilder()
2594             .addArrayBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount, ALL_RAY_TRACING_STAGES)
2595             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
2596                               ALL_RAY_TRACING_STAGES) // pointers to all acceleration structures
2597             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
2598                               ALL_RAY_TRACING_STAGES) // ssbo with indices of all acceleration structures
2599             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES) // ssbo with result values
2600             .build(vkd, device);
2601 
2602     const Move<VkDescriptorPool> descriptorPool =
2603         DescriptorPoolBuilder()
2604             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount)
2605             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2606             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2607             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2608             .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2609     const Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
2610 
2611     de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
2612     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,
2613                                   createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0), 0);
2614     rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
2615                                   createShaderModule(vkd, device, m_context.getBinaryCollection().get("chit"), 0), 1);
2616 
2617     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
2618     Move<VkPipeline> pipeline                   = rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
2619     de::MovePtr<BufferWithMemory> raygenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
2620         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
2621     de::MovePtr<BufferWithMemory> hitShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
2622         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
2623 
2624     const VkStridedDeviceAddressRegionKHR raygenShaderBindingTableRegion =
2625         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0),
2626                                           shaderGroupHandleSize, shaderGroupHandleSize);
2627     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
2628         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2629     const VkStridedDeviceAddressRegionKHR hitShaderBindingTableRegion =
2630         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0),
2631                                           shaderGroupHandleSize, shaderGroupHandleSize);
2632     const VkStridedDeviceAddressRegionKHR callableShaderBindingTableRegion =
2633         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
2634 
2635     const VkDeviceSize pointerBufferSize = tlasCount * sizeof(VkDeviceAddress);
2636     const VkBufferCreateInfo pointerBufferCreateInfo =
2637         makeBufferCreateInfo(pointerBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2638     de::MovePtr<BufferWithMemory> pointerBuffer = de::MovePtr<BufferWithMemory>(
2639         new BufferWithMemory(vkd, device, allocator, pointerBufferCreateInfo,
2640                              MemoryRequirement::HostVisible | MemoryRequirement::DeviceAddress));
2641 
2642     const VkDeviceSize indicesBufferSize = activeTlasCount * sizeof(uint32_t);
2643     const VkBufferCreateInfo indicesBufferCreateInfo =
2644         makeBufferCreateInfo(indicesBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2645     de::MovePtr<BufferWithMemory> indicesBuffer = de::MovePtr<BufferWithMemory>(
2646         new BufferWithMemory(vkd, device, allocator, indicesBufferCreateInfo, MemoryRequirement::HostVisible));
2647 
2648     const VkDeviceSize resultBufferSize = activeTlasCount * sizeof(uint32_t) * 4;
2649     const VkBufferCreateInfo resultBufferCreateInfo =
2650         makeBufferCreateInfo(resultBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
2651     de::MovePtr<BufferWithMemory> resultBuffer = de::MovePtr<BufferWithMemory>(
2652         new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
2653 
2654     const Move<VkCommandPool> cmdPool = createCommandPool(vkd, device, 0, queueFamilyIndex);
2655     const Move<VkCommandBuffer> cmdBuffer =
2656         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2657 
2658     de::SharedPtr<BottomLevelAccelerationStructure> blas =
2659         de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
2660     std::vector<de::MovePtr<TopLevelAccelerationStructure>> tlasVect(tlasCount);
2661     std::vector<VkDeviceAddress> tlasPtrVect(tlasCount);
2662     std::vector<VkAccelerationStructureKHR> tlasVkVect;
2663 
2664     // randomly scatter active AS across the range
2665     deRandom rnd;
2666     deRandom_init(&rnd, 123);
2667     std::set<uint32_t> asIndicesSet;
2668     while (asIndicesSet.size() < activeTlasCount)
2669         asIndicesSet.insert(deRandom_getUint32(&rnd) % tlasCount);
2670 
2671     // fill indices buffer
2672     uint32_t helperIndex       = 0;
2673     auto &indicesBufferAlloc   = indicesBuffer->getAllocation();
2674     uint32_t *indicesBufferPtr = reinterpret_cast<uint32_t *>(indicesBufferAlloc.getHostPtr());
2675     std::for_each(asIndicesSet.begin(), asIndicesSet.end(),
2676                   [&helperIndex, indicesBufferPtr](const uint32_t &index) { indicesBufferPtr[helperIndex++] = index; });
2677     vk::flushAlloc(vkd, device, indicesBufferAlloc);
2678 
2679     // clear result buffer
2680     auto &resultBufferAlloc = resultBuffer->getAllocation();
2681     void *resultBufferPtr   = resultBufferAlloc.getHostPtr();
2682     deMemset(resultBufferPtr, 0, static_cast<size_t>(resultBufferSize));
2683     vk::flushAlloc(vkd, device, resultBufferAlloc);
2684 
2685     beginCommandBuffer(vkd, *cmdBuffer, 0u);
2686     {
2687         // build bottom level acceleration structure
2688         blas->setGeometryData(
2689             {
2690                 {0.0, 0.0, 0.0},
2691                 {1.0, 0.0, 0.0},
2692                 {0.0, 1.0, 0.0},
2693             },
2694             true, VK_GEOMETRY_OPAQUE_BIT_KHR);
2695 
2696         blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2697 
2698         // build top level acceleration structures
2699         for (uint32_t tlasIndex = 0; tlasIndex < tlasCount; ++tlasIndex)
2700         {
2701             auto &tlas = tlasVect[tlasIndex];
2702             tlas       = makeTopLevelAccelerationStructure();
2703             tlas->setInstanceCount(1);
2704             tlas->addInstance(blas);
2705             if (!asIndicesSet.count(tlasIndex))
2706             {
2707                 // tlas that are not in asIndicesSet should be empty but it is hard to do
2708                 // that with current cts utils so we are marking them as inactive instead
2709                 tlas->setInactiveInstances(true);
2710             }
2711             tlas->createAndBuild(vkd, device, *cmdBuffer, allocator);
2712 
2713             // get acceleration structure device address
2714             const VkAccelerationStructureDeviceAddressInfoKHR addressInfo = {
2715                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // VkStructureType                sType
2716                 DE_NULL,        // const void*                    pNext
2717                 *tlas->getPtr() // VkAccelerationStructureKHR    accelerationStructure
2718             };
2719             VkDeviceAddress vkda   = vkd.getAccelerationStructureDeviceAddressKHR(device, &addressInfo);
2720             tlasPtrVect[tlasIndex] = vkda;
2721         }
2722 
2723         // fill pointer buffer
2724         vkd.cmdUpdateBuffer(*cmdBuffer, **pointerBuffer, 0, pointerBufferSize, tlasPtrVect.data());
2725 
2726         // wait for data transfers
2727         const VkMemoryBarrier bufferUploadBarrier =
2728             makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
2729         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
2730                                  VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &bufferUploadBarrier, 1u);
2731 
2732         // wait for as build
2733         const VkMemoryBarrier asBuildBarrier = makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
2734                                                                  VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR);
2735         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
2736                                  VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &asBuildBarrier, 1u);
2737 
2738         tlasVkVect.reserve(tlasCount);
2739         for (auto &tlas : tlasVect)
2740             tlasVkVect.push_back(*tlas->getPtr());
2741 
2742         VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
2743             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
2744             DE_NULL,                                                           // const void* pNext;
2745             tlasCount,                                                         // uint32_t accelerationStructureCount;
2746             tlasVkVect.data(), // const VkAccelerationStructureKHR* pAccelerationStructures;
2747         };
2748 
2749         const vk::VkDescriptorBufferInfo pointerBufferInfo =
2750             makeDescriptorBufferInfo(**pointerBuffer, 0u, VK_WHOLE_SIZE);
2751         const vk::VkDescriptorBufferInfo indicesBufferInfo =
2752             makeDescriptorBufferInfo(**indicesBuffer, 0u, VK_WHOLE_SIZE);
2753         const vk::VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(**resultBuffer, 0u, VK_WHOLE_SIZE);
2754 
2755         DescriptorSetUpdateBuilder()
2756             .writeArray(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2757                         VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, tlasCount,
2758                         &accelerationStructureWriteDescriptorSet)
2759             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2760                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &pointerBufferInfo)
2761             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u),
2762                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indicesBufferInfo)
2763             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(3u),
2764                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo)
2765             .update(vkd, device);
2766 
2767         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
2768                                   &descriptorSet.get(), 0, DE_NULL);
2769 
2770         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
2771 
2772         cmdTraceRays(vkd, *cmdBuffer, &raygenShaderBindingTableRegion, &missShaderBindingTableRegion,
2773                      &hitShaderBindingTableRegion, &callableShaderBindingTableRegion, activeTlasCount, 1, 1);
2774 
2775         const VkMemoryBarrier postTraceMemoryBarrier =
2776             makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
2777         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
2778                                  VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
2779     }
2780     endCommandBuffer(vkd, *cmdBuffer);
2781 
2782     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
2783 
2784     invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(),
2785                                 resultBuffer->getAllocation().getOffset(), resultBufferSize);
2786 
2787     // verify result buffer
2788     uint32_t failures         = 0;
2789     const uint32_t *resultPtr = reinterpret_cast<uint32_t *>(resultBuffer->getAllocation().getHostPtr());
2790     for (uint32_t index = 0; index < activeTlasCount; ++index)
2791     {
2792         failures += (resultPtr[0 * activeTlasCount + index] != 2) + (resultPtr[1 * activeTlasCount + index] != 3) +
2793                     (resultPtr[2 * activeTlasCount + index] != 5) + (resultPtr[3 * activeTlasCount + index] != 7);
2794     }
2795 
2796     if (failures)
2797         return tcu::TestStatus::fail(de::toString(failures) + " failures, " +
2798                                      de::toString(4 * activeTlasCount - failures) + " are ok");
2799     return tcu::TestStatus::pass("Pass");
2800 }
2801 
2802 // Tests the vkGetDeviceAccelerationStructureKHR routine
2803 class RayTracingDeviceASCompabilityKHRTestInstance : public TestInstance
2804 {
2805 public:
RayTracingDeviceASCompabilityKHRTestInstance(Context & context,const de::SharedPtr<TestParams> params)2806     RayTracingDeviceASCompabilityKHRTestInstance(Context &context, const de::SharedPtr<TestParams> params)
2807         : TestInstance(context)
2808         , m_params(params)
2809     {
2810     }
2811 
2812     tcu::TestStatus iterate(void) override;
2813 
2814 protected:
2815     template <class ASType>
2816     bool performTest(VkCommandPool cmdPool, VkCommandBuffer cmdBuffer,
2817                      const std::vector<de::SharedPtr<ASType>> sourceStructures,
2818                      const std::vector<VkDeviceSize> &copySizes, const std::vector<VkDeviceSize> &compactSizes);
2819 
2820     VkAccelerationStructureCompatibilityKHR getDeviceASCompatibilityKHR(const uint8_t *versionInfoData);
2821     std::string getUUIDsString(const uint8_t *header) const;
2822 
2823 private:
2824     const de::SharedPtr<TestParams> m_params;
2825 };
2826 
2827 // Tests for updating botto-level AS(s) address(es) in top-level AS's header
2828 class RayTracingHeaderBottomAddressTestInstance : public TestInstance
2829 {
2830 public:
RayTracingHeaderBottomAddressTestInstance(Context & context,const de::SharedPtr<TestParams> params)2831     RayTracingHeaderBottomAddressTestInstance(Context &context, const de::SharedPtr<TestParams> params)
2832         : TestInstance(context)
2833         , m_params(params)
2834     {
2835     }
2836     tcu::TestStatus iterate(void) override;
2837 
2838 protected:
2839     de::SharedPtr<TopLevelAccelerationStructure> prepareTopAccelerationStructure(const DeviceInterface &vk,
2840                                                                                  VkDevice device, Allocator &allocator,
2841                                                                                  VkCommandBuffer cmdBuffer);
2842 
2843     bool areAddressesTheSame(const std::vector<uint64_t> &addresses,
2844                              const SerialStorage::AccelerationStructureHeader *header);
2845 
2846     bool areAddressesDifferent(const std::vector<uint64_t> &addresses1, const std::vector<uint64_t> &addresses2);
2847 
2848 private:
2849     const de::SharedPtr<TestParams> m_params;
2850 };
2851 
2852 class RayTracingDeviceASCompabilityKHRTestCase : public TestCase
2853 {
2854 public:
RayTracingDeviceASCompabilityKHRTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2855     RayTracingDeviceASCompabilityKHRTestCase(tcu::TestContext &ctx, const char *name,
2856                                              const de::SharedPtr<TestParams> params)
2857         : TestCase(ctx, name)
2858         , m_params(params)
2859     {
2860     }
2861 
2862     void checkSupport(Context &context) const override;
createInstance(Context & context) const2863     TestInstance *createInstance(Context &context) const override
2864     {
2865         return new RayTracingDeviceASCompabilityKHRTestInstance(context, m_params);
2866     }
2867 
2868 private:
2869     de::SharedPtr<TestParams> m_params;
2870 };
2871 
2872 class RayTracingHeaderBottomAddressTestCase : public TestCase
2873 {
2874 public:
RayTracingHeaderBottomAddressTestCase(tcu::TestContext & ctx,const char * name,const de::SharedPtr<TestParams> params)2875     RayTracingHeaderBottomAddressTestCase(tcu::TestContext &ctx, const char *name,
2876                                           const de::SharedPtr<TestParams> params)
2877         : TestCase(ctx, name)
2878         , m_params(params)
2879     {
2880     }
2881 
2882     void checkSupport(Context &context) const override;
createInstance(Context & context) const2883     TestInstance *createInstance(Context &context) const override
2884     {
2885         return new RayTracingHeaderBottomAddressTestInstance(context, m_params);
2886     }
2887 
2888 private:
2889     de::SharedPtr<TestParams> m_params;
2890 };
2891 
checkSupport(Context & context) const2892 void RayTracingDeviceASCompabilityKHRTestCase ::checkSupport(Context &context) const
2893 {
2894     context.requireInstanceFunctionality("VK_KHR_get_physical_device_properties2");
2895     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2896 
2897     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
2898         context.getAccelerationStructureFeatures();
2899     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR &&
2900         accelerationStructureFeaturesKHR.accelerationStructureHostCommands == false)
2901         TCU_THROW(NotSupportedError,
2902                   "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2903 
2904     // Check supported vertex format.
2905     checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(),
2906                                                  m_params->vertexFormat);
2907 }
2908 
checkSupport(Context & context) const2909 void RayTracingHeaderBottomAddressTestCase ::checkSupport(Context &context) const
2910 {
2911     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
2912 
2913     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
2914         context.getAccelerationStructureFeatures();
2915     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR &&
2916         accelerationStructureFeaturesKHR.accelerationStructureHostCommands == false)
2917         TCU_THROW(NotSupportedError,
2918                   "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
2919 
2920     // Check supported vertex format.
2921     checkAccelerationStructureVertexBufferFormat(context.getInstanceInterface(), context.getPhysicalDevice(),
2922                                                  m_params->vertexFormat);
2923 }
2924 
getDeviceASCompatibilityKHR(const uint8_t * versionInfoData)2925 VkAccelerationStructureCompatibilityKHR RayTracingDeviceASCompabilityKHRTestInstance::getDeviceASCompatibilityKHR(
2926     const uint8_t *versionInfoData)
2927 {
2928     const VkDevice device      = m_context.getDevice();
2929     const DeviceInterface &vkd = m_context.getDeviceInterface();
2930 
2931     VkAccelerationStructureCompatibilityKHR compability = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_MAX_ENUM_KHR;
2932 
2933     const VkAccelerationStructureVersionInfoKHR versionInfo = {
2934         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_VERSION_INFO_KHR, // sType
2935         DE_NULL,                                                   // pNext
2936         versionInfoData                                            // pVersionData
2937     };
2938 
2939     vkd.getDeviceAccelerationStructureCompatibilityKHR(device, &versionInfo, &compability);
2940 
2941     return compability;
2942 }
2943 
getUUIDsString(const uint8_t * header) const2944 std::string RayTracingDeviceASCompabilityKHRTestInstance::getUUIDsString(const uint8_t *header) const
2945 {
2946     std::stringstream ss;
2947 
2948     int offset         = 0;
2949     const int widths[] = {4, 2, 2, 2, 6};
2950 
2951     for (int h = 0; h < 2; ++h)
2952     {
2953         if (h)
2954             ss << ' ';
2955 
2956         for (int w = 0; w < DE_LENGTH_OF_ARRAY(widths); ++w)
2957         {
2958             if (w)
2959                 ss << '-';
2960 
2961             for (int i = 0; i < widths[w]; ++i)
2962                 ss << std::hex << std::uppercase << static_cast<int>(header[i + offset]);
2963 
2964             offset += widths[w];
2965         }
2966     }
2967 
2968     return ss.str();
2969 }
2970 
iterate(void)2971 tcu::TestStatus RayTracingDeviceASCompabilityKHRTestInstance::iterate(void)
2972 {
2973     const DeviceInterface &vkd      = m_context.getDeviceInterface();
2974     const VkDevice device           = m_context.getDevice();
2975     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2976     const VkQueue queue             = m_context.getUniversalQueue();
2977     Allocator &allocator            = m_context.getDefaultAllocator();
2978 
2979     const Move<VkCommandPool> cmdPool =
2980         createCommandPool(vkd, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
2981     const Move<VkCommandBuffer> cmdBuffer =
2982         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2983 
2984     bool result = false;
2985 
2986     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomStructures;
2987     std::vector<VkAccelerationStructureKHR> bottomHandles;
2988     std::vector<de::SharedPtr<TopLevelAccelerationStructure>> topStructures;
2989     std::vector<VkAccelerationStructureKHR> topHandles;
2990     Move<VkQueryPool> queryPoolCompact;
2991     Move<VkQueryPool> queryPoolSerial;
2992     std::vector<VkDeviceSize> compactSizes;
2993     std::vector<VkDeviceSize> serialSizes;
2994 
2995     beginCommandBuffer(vkd, *cmdBuffer, 0u);
2996 
2997     bottomStructures = m_params->testConfiguration->initBottomAccelerationStructures(m_context, *m_params);
2998     for (auto &blas : bottomStructures)
2999     {
3000         blas->setBuildType(m_params->buildType);
3001         blas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3002         blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
3003         bottomHandles.push_back(*(blas->getPtr()));
3004     }
3005 
3006     if (m_params->operationTarget == OT_TOP_ACCELERATION)
3007     {
3008         de::MovePtr<TopLevelAccelerationStructure> tlas =
3009             m_params->testConfiguration->initTopAccelerationStructure(m_context, *m_params, bottomStructures);
3010         tlas->setBuildType(m_params->buildType);
3011         tlas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3012         tlas->createAndBuild(vkd, device, *cmdBuffer, allocator);
3013         topHandles.push_back(*(tlas->getPtr()));
3014         topStructures.push_back(de::SharedPtr<TopLevelAccelerationStructure>(tlas.release()));
3015     }
3016 
3017     const uint32_t queryCount = uint32_t(
3018         (m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomStructures.size() : topStructures.size());
3019     const std::vector<VkAccelerationStructureKHR> &handles =
3020         (m_params->operationTarget == OT_BOTTOM_ACCELERATION) ? bottomHandles : topHandles;
3021 
3022     // query compact size
3023     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3024         queryPoolCompact =
3025             makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount);
3026     queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, *queryPoolCompact,
3027                                    VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, 0u, compactSizes);
3028 
3029     // query serialization size
3030     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3031         queryPoolSerial =
3032             makeQueryPool(vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3033     queryAccelerationStructureSize(vkd, device, *cmdBuffer, handles, m_params->buildType, queryPoolSerial.get(),
3034                                    VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, serialSizes);
3035 
3036     endCommandBuffer(vkd, *cmdBuffer);
3037     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
3038 
3039     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3040     {
3041         VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompact, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3042                                          compactSizes.data(), sizeof(VkDeviceSize),
3043                                          VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3044         VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolSerial, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3045                                          serialSizes.data(), sizeof(VkDeviceSize),
3046                                          VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3047 
3048         vkd.resetCommandPool(device, *cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
3049     }
3050 
3051     if (m_params->operationTarget == OT_BOTTOM_ACCELERATION)
3052         result = performTest<BottomLevelAccelerationStructure>(*cmdPool, *cmdBuffer, bottomStructures, compactSizes,
3053                                                                serialSizes);
3054     else
3055         result =
3056             performTest<TopLevelAccelerationStructure>(*cmdPool, *cmdBuffer, topStructures, compactSizes, serialSizes);
3057 
3058     return result ? tcu::TestStatus::pass("") : tcu::TestStatus::fail("");
3059 }
3060 
3061 template <class ASType>
performTest(VkCommandPool cmdPool,VkCommandBuffer cmdBuffer,const std::vector<de::SharedPtr<ASType>> sourceStructures,const std::vector<VkDeviceSize> & compactSizes,const std::vector<VkDeviceSize> & serialSizes)3062 bool RayTracingDeviceASCompabilityKHRTestInstance::performTest(
3063     VkCommandPool cmdPool, VkCommandBuffer cmdBuffer, const std::vector<de::SharedPtr<ASType>> sourceStructures,
3064     const std::vector<VkDeviceSize> &compactSizes, const std::vector<VkDeviceSize> &serialSizes)
3065 {
3066     const VkQueue queue        = m_context.getUniversalQueue();
3067     const VkDevice device      = m_context.getDevice();
3068     const DeviceInterface &vkd = m_context.getDeviceInterface();
3069     Allocator &allocator       = m_context.getDefaultAllocator();
3070 
3071     const uint32_t sourceStructuresCount = uint32_t(sourceStructures.size());
3072 
3073     Move<VkQueryPool> queryPoolCompactSerial;
3074     std::vector<VkDeviceSize> compactSerialSizes;
3075 
3076     std::vector<VkAccelerationStructureKHR> compactHandles;
3077     std::vector<de::SharedPtr<ASType>> compactStructures;
3078 
3079     std::vector<de::SharedPtr<SerialStorage>> sourceSerialized;
3080     std::vector<de::SharedPtr<SerialStorage>> compactSerialized;
3081 
3082     // make compact copy of acceleration structure
3083     {
3084         beginCommandBuffer(vkd, cmdBuffer, 0u);
3085 
3086         for (size_t i = 0; i < sourceStructuresCount; ++i)
3087         {
3088             de::MovePtr<ASType> asCopy = makeAccelerationStructure<ASType>();
3089             asCopy->setBuildType(m_params->buildType);
3090             asCopy->createAndCopyFrom(vkd, device, cmdBuffer, allocator, sourceStructures[i].get(), compactSizes[i],
3091                                       0u);
3092             compactHandles.push_back(*(asCopy->getPtr()));
3093             compactStructures.push_back(de::SharedPtr<ASType>(asCopy.release()));
3094         }
3095 
3096         // query serialization size of compact acceleration structures
3097         if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3098             queryPoolCompactSerial = makeQueryPool(
3099                 vkd, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, sourceStructuresCount);
3100         queryAccelerationStructureSize(
3101             vkd, device, cmdBuffer, compactHandles, m_params->buildType, *queryPoolCompactSerial,
3102             VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, 0u, compactSerialSizes);
3103 
3104         endCommandBuffer(vkd, cmdBuffer);
3105         submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3106 
3107         if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3108         {
3109             VK_CHECK(vkd.getQueryPoolResults(device, *queryPoolCompactSerial, 0u, sourceStructuresCount,
3110                                              (sourceStructuresCount * sizeof(VkDeviceSize)), compactSerialSizes.data(),
3111                                              sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3112             vkd.resetCommandPool(device, cmdPool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
3113         }
3114     }
3115 
3116     // serialize both structures to memory
3117     {
3118         beginCommandBuffer(vkd, cmdBuffer, 0u);
3119 
3120         for (size_t i = 0; i < sourceStructuresCount; ++i)
3121         {
3122             sourceSerialized.push_back(de::SharedPtr<SerialStorage>(
3123                 new SerialStorage(vkd, device, allocator, m_params->buildType, serialSizes[i])));
3124             sourceStructures[i]->serialize(vkd, device, cmdBuffer, sourceSerialized.back().get());
3125 
3126             compactSerialized.push_back(de::SharedPtr<SerialStorage>(
3127                 new SerialStorage(vkd, device, allocator, m_params->buildType, compactSerialSizes[i])));
3128             compactStructures[i]->serialize(vkd, device, cmdBuffer, compactSerialized.back().get());
3129         }
3130 
3131         endCommandBuffer(vkd, cmdBuffer);
3132         submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3133     }
3134 
3135     // verify compatibility
3136     bool result = true;
3137     for (size_t i = 0; result && (i < sourceStructuresCount); ++i)
3138     {
3139         const uint8_t *s_header = static_cast<const uint8_t *>(sourceSerialized[i]->getHostAddressConst().hostAddress);
3140         const uint8_t *c_header = static_cast<const uint8_t *>(compactSerialized[i]->getHostAddressConst().hostAddress);
3141 
3142         const auto s_compability = getDeviceASCompatibilityKHR(s_header);
3143         const auto c_compability = getDeviceASCompatibilityKHR(c_header);
3144 
3145         result &= ((s_compability == c_compability) &&
3146                    (s_compability == VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR));
3147 
3148         if (!result)
3149         {
3150             tcu::TestLog &log = m_context.getTestContext().getLog();
3151 
3152             log << tcu::TestLog::Message << getUUIDsString(s_header) << " serialized AS compability failed"
3153                 << tcu::TestLog::EndMessage;
3154             log << tcu::TestLog::Message << getUUIDsString(c_header) << " compact AS compability failed"
3155                 << tcu::TestLog::EndMessage;
3156         }
3157     }
3158 
3159     return result;
3160 }
3161 
prepareTopAccelerationStructure(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer)3162 de::SharedPtr<TopLevelAccelerationStructure> RayTracingHeaderBottomAddressTestInstance::prepareTopAccelerationStructure(
3163     const DeviceInterface &vk, VkDevice device, Allocator &allocator, VkCommandBuffer cmdBuffer)
3164 {
3165     const std::vector<tcu::Vec3> geometryData = {
3166         {0.0, 0.0, 0.0},
3167         {1.0, 0.0, 0.0},
3168         {0.0, 1.0, 0.0},
3169     };
3170 
3171     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottoms;
3172 
3173     if (TopTestType::IDENTICAL_INSTANCES == m_params->topTestType)
3174     {
3175         auto blas = de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3176         blas->setBuildType(m_params->buildType);
3177         blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3178         blas->createAndBuild(vk, device, cmdBuffer, allocator);
3179         for (uint32_t i = 0; i < m_params->width; ++i)
3180         {
3181             bottoms.emplace_back(blas);
3182         }
3183     }
3184     else if (TopTestType::DIFFERENT_INSTANCES == m_params->topTestType)
3185     {
3186         for (uint32_t i = 0; i < m_params->width; ++i)
3187         {
3188             auto blas =
3189                 de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3190             blas->setBuildType(m_params->buildType);
3191             blas->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3192             blas->createAndBuild(vk, device, cmdBuffer, allocator);
3193             bottoms.emplace_back(blas);
3194         }
3195     }
3196     else // TTT_MIX_INSTANCES == m_params->topTestType
3197     {
3198         for (uint32_t i = 0; i < m_params->width; ++i)
3199         {
3200             {
3201                 auto blas1 =
3202                     de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3203                 blas1->setBuildType(m_params->buildType);
3204                 blas1->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3205                 blas1->createAndBuild(vk, device, cmdBuffer, allocator);
3206                 bottoms.emplace_back(blas1);
3207             }
3208 
3209             {
3210                 auto blas2 =
3211                     de::SharedPtr<BottomLevelAccelerationStructure>(makeBottomLevelAccelerationStructure().release());
3212                 blas2->setBuildType(m_params->buildType);
3213                 blas2->setGeometryData(geometryData, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3214                 blas2->createAndBuild(vk, device, cmdBuffer, allocator);
3215                 bottoms.emplace_back(blas2);
3216             }
3217         }
3218     }
3219 
3220     const std::size_t instanceCount = bottoms.size();
3221 
3222     de::MovePtr<TopLevelAccelerationStructure> tlas = makeTopLevelAccelerationStructure();
3223     tlas->setBuildType(m_params->buildType);
3224     tlas->setInstanceCount(instanceCount);
3225 
3226     for (std::size_t i = 0; i < instanceCount; ++i)
3227     {
3228         const VkTransformMatrixKHR transformMatrixKHR = {{
3229             //  float matrix[3][4];
3230             {1.0f, 0.0f, 0.0f, (float)i},
3231             {0.0f, 1.0f, 0.0f, (float)i},
3232             {0.0f, 0.0f, 1.0f, 0.0f},
3233         }};
3234         tlas->addInstance(bottoms[i], transformMatrixKHR, 0, m_params->cullMask, 0u,
3235                           getCullFlags((m_params->cullFlags)));
3236     }
3237 
3238     tlas->createAndBuild(vk, device, cmdBuffer, allocator);
3239 
3240     return de::SharedPtr<TopLevelAccelerationStructure>(tlas.release());
3241 }
3242 
iterate(void)3243 tcu::TestStatus RayTracingHeaderBottomAddressTestInstance::iterate(void)
3244 {
3245     const DeviceInterface &vkd = m_context.getDeviceInterface();
3246     const VkDevice device      = m_context.getDevice();
3247     const uint32_t familyIndex = m_context.getUniversalQueueFamilyIndex();
3248     const VkQueue queue        = m_context.getUniversalQueue();
3249     Allocator &allocator       = m_context.getDefaultAllocator();
3250 
3251     const Move<VkCommandPool> cmdPool =
3252         createCommandPool(vkd, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3253     const Move<VkCommandBuffer> cmdBuffer =
3254         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3255 
3256     beginCommandBuffer(vkd, *cmdBuffer, 0);
3257     de::SharedPtr<TopLevelAccelerationStructure> src =
3258         prepareTopAccelerationStructure(vkd, device, allocator, *cmdBuffer);
3259     endCommandBuffer(vkd, *cmdBuffer);
3260     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3261 
3262     de::MovePtr<TopLevelAccelerationStructure> dst = makeTopLevelAccelerationStructure();
3263 
3264     const std::vector<uint64_t> inAddrs     = src->getSerializingAddresses(vkd, device);
3265     const std::vector<VkDeviceSize> inSizes = src->getSerializingSizes(vkd, device, queue, familyIndex);
3266 
3267     const SerialInfo serialInfo(inAddrs, inSizes);
3268     SerialStorage deepStorage(vkd, device, allocator, m_params->buildType, serialInfo);
3269 
3270     // make deep serialization - top-level AS width bottom-level structures that it owns
3271     vkd.resetCommandBuffer(*cmdBuffer, 0);
3272     beginCommandBuffer(vkd, *cmdBuffer, 0);
3273     src->serialize(vkd, device, *cmdBuffer, &deepStorage);
3274     endCommandBuffer(vkd, *cmdBuffer);
3275     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3276 
3277     // deserialize all from the previous step to a new top-level AS
3278     // bottom-level structure addresses should be updated when deep data is deserialized
3279     vkd.resetCommandBuffer(*cmdBuffer, 0);
3280     beginCommandBuffer(vkd, *cmdBuffer, 0);
3281     dst->createAndDeserializeFrom(vkd, device, *cmdBuffer, allocator, &deepStorage);
3282     endCommandBuffer(vkd, *cmdBuffer);
3283     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3284 
3285     SerialStorage shallowStorage(vkd, device, allocator, m_params->buildType, inSizes[0]);
3286 
3287     // make shallow serialization - only top-level AS without bottom-level structures
3288     vkd.resetCommandBuffer(*cmdBuffer, 0);
3289     beginCommandBuffer(vkd, *cmdBuffer, 0);
3290     dst->serialize(vkd, device, *cmdBuffer, &shallowStorage);
3291     endCommandBuffer(vkd, *cmdBuffer);
3292     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
3293 
3294     // get data to verification
3295     const std::vector<uint64_t> outAddrs                     = dst->getSerializingAddresses(vkd, device);
3296     const SerialStorage::AccelerationStructureHeader *header = shallowStorage.getASHeader();
3297 
3298     return (areAddressesDifferent(inAddrs, outAddrs) && areAddressesTheSame(outAddrs, header)) ?
3299                tcu::TestStatus::pass("") :
3300                tcu::TestStatus::fail("");
3301 }
3302 
areAddressesTheSame(const std::vector<uint64_t> & addresses,const SerialStorage::AccelerationStructureHeader * header)3303 bool RayTracingHeaderBottomAddressTestInstance::areAddressesTheSame(
3304     const std::vector<uint64_t> &addresses, const SerialStorage::AccelerationStructureHeader *header)
3305 {
3306     const uint32_t cbottoms = uint32_t(addresses.size() - 1);
3307 
3308     // header should contain the same number of handles as serialized/deserialized top-level AS
3309     if (cbottoms != header->handleCount)
3310         return false;
3311 
3312     std::set<uint64_t> refAddrs;
3313     std::set<uint64_t> checkAddrs;
3314 
3315     // distinct, squach and sort address list
3316     for (uint32_t i = 0; i < cbottoms; ++i)
3317     {
3318         refAddrs.insert(addresses[i + 1]);
3319         checkAddrs.insert(header->handleArray[i]);
3320     }
3321 
3322     return std::equal(refAddrs.begin(), refAddrs.end(), checkAddrs.begin());
3323 }
3324 
areAddressesDifferent(const std::vector<uint64_t> & addresses1,const std::vector<uint64_t> & addresses2)3325 bool RayTracingHeaderBottomAddressTestInstance::areAddressesDifferent(const std::vector<uint64_t> &addresses1,
3326                                                                       const std::vector<uint64_t> &addresses2)
3327 {
3328     // the number of addresses must be equal
3329     if (addresses1.size() != addresses2.size())
3330         return false;
3331 
3332     // adresses of top-level AS must differ
3333     if (addresses1[0] == addresses2[0])
3334         return false;
3335 
3336     std::set<uint64_t> addrs1;
3337     std::set<uint64_t> addrs2;
3338     uint32_t matches        = 0;
3339     const uint32_t cbottoms = uint32_t(addresses1.size() - 1);
3340 
3341     for (uint32_t i = 0; i < cbottoms; ++i)
3342     {
3343         addrs1.insert(addresses1[i + 1]);
3344         addrs2.insert(addresses2[i + 1]);
3345     }
3346 
3347     // the first addresses set must not contain any address from the second addresses set
3348     for (auto &addr1 : addrs1)
3349     {
3350         if (addrs2.end() != addrs2.find(addr1))
3351             ++matches;
3352     }
3353 
3354     return (matches == 0);
3355 }
3356 
3357 template <class X, class... Y>
makeShared(Y &&...ctorArgs)3358 inline de::SharedPtr<X> makeShared(Y &&...ctorArgs)
3359 {
3360     return de::SharedPtr<X>(new X(std::forward<Y>(ctorArgs)...));
3361 }
3362 template <class X, class... Y>
makeMovePtr(Y &&...ctorArgs)3363 inline de::MovePtr<X> makeMovePtr(Y &&...ctorArgs)
3364 {
3365     return de::MovePtr<X>(new X(std::forward<Y>(ctorArgs)...));
3366 }
3367 template <class X>
makeSharedFrom(const X & x)3368 inline de::SharedPtr<X> makeSharedFrom(const X &x)
3369 {
3370     return makeShared<X>(x);
3371 }
3372 
3373 struct QueryPoolResultsParams
3374 {
3375     enum class Type
3376     {
3377         StructureSize,
3378         PointerCount
3379     } queryType;
3380     VkAccelerationStructureBuildTypeKHR buildType;
3381     uint32_t blasCount;
3382     bool inVkBuffer;
3383     bool compacted;
3384 };
3385 
3386 typedef de::SharedPtr<const QueryPoolResultsParams> QueryPoolResultsParamsPtr;
3387 
3388 struct ASInterface;
3389 typedef de::SharedPtr<ASInterface> ASInterfacePtr;
3390 
3391 class QueryPoolResultsInstance : public TestInstance
3392 {
3393 public:
3394     using TlasPtr = de::SharedPtr<TopLevelAccelerationStructure>;
3395     using BlasPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
3396 
QueryPoolResultsInstance(Context & context,QueryPoolResultsParamsPtr params)3397     QueryPoolResultsInstance(Context &context, QueryPoolResultsParamsPtr params)
3398         : TestInstance(context)
3399         , m_params(params)
3400     {
3401     }
3402     auto prepareBottomAccStructures(const DeviceInterface &vk, VkDevice device, Allocator &allocator,
3403                                     VkCommandBuffer cmdBuffer) -> std::vector<BlasPtr>;
3404     TlasPtr prepareTopAccStructure(const DeviceInterface &vk, VkDevice device, Allocator &allocator,
3405                                    VkCommandBuffer cmdBuffer, const std::vector<BlasPtr> &bottoms);
3406 
3407 protected:
3408     const QueryPoolResultsParamsPtr m_params;
3409 };
3410 
3411 struct ASInterface
3412 {
~ASInterfacevkt::RayTracing::__anona25d47410111::ASInterface3413     virtual ~ASInterface()
3414     {
3415     }
3416     virtual VkAccelerationStructureKHR getPtr() const                               = 0;
3417     virtual VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const = 0;
3418     virtual ASInterfacePtr clone(Context &ctx, VkAccelerationStructureBuildTypeKHR buildType, const VkCommandBuffer cmd,
3419                                  VkDeviceSize size)                                 = 0;
3420 };
3421 
3422 template <class>
3423 struct ASAllocator;
3424 template <>
3425 struct ASAllocator<QueryPoolResultsInstance::TlasPtr>
3426 {
3427     typedef QueryPoolResultsInstance::TlasPtr TlasPtr;
allocvkt::RayTracing::__anona25d47410111::ASAllocator3428     static TlasPtr alloc()
3429     {
3430         return TlasPtr(makeTopLevelAccelerationStructure().release());
3431     }
3432 };
3433 template <>
3434 struct ASAllocator<QueryPoolResultsInstance::BlasPtr>
3435 {
3436     typedef QueryPoolResultsInstance::BlasPtr BlasPtr;
allocvkt::RayTracing::__anona25d47410111::ASAllocator3437     static BlasPtr alloc()
3438     {
3439         return BlasPtr(makeBottomLevelAccelerationStructure().release());
3440     }
3441 };
3442 
3443 template <class SharedPtrType>
3444 struct ASInterfaceImpl : ASInterface
3445 {
3446     SharedPtrType m_source;
ASInterfaceImplvkt::RayTracing::__anona25d47410111::ASInterfaceImpl3447     ASInterfaceImpl(SharedPtrType src) : m_source(src)
3448     {
3449     }
getPtrvkt::RayTracing::__anona25d47410111::ASInterfaceImpl3450     virtual VkAccelerationStructureKHR getPtr() const override
3451     {
3452         return *m_source->getPtr();
3453     }
getStructureBuildSizesvkt::RayTracing::__anona25d47410111::ASInterfaceImpl3454     virtual VkAccelerationStructureBuildSizesInfoKHR getStructureBuildSizes() const override
3455     {
3456         return m_source->getStructureBuildSizes();
3457     }
clonevkt::RayTracing::__anona25d47410111::ASInterfaceImpl3458     virtual ASInterfacePtr clone(Context &ctx, VkAccelerationStructureBuildTypeKHR buildType, const VkCommandBuffer cmd,
3459                                  VkDeviceSize size) override
3460     {
3461         const DeviceInterface &vk = ctx.getDeviceInterface();
3462         const VkDevice device     = ctx.getDevice();
3463         Allocator &allocator      = ctx.getDefaultAllocator();
3464 
3465         auto ptr = ASAllocator<SharedPtrType>::alloc();
3466         ptr->setBuildType(buildType);
3467         ptr->setBuildFlags(m_source->getBuildFlags());
3468         ptr->create(vk, device, allocator, size);
3469         ptr->copyFrom(vk, device, cmd, m_source.get(), false);
3470         return de::SharedPtr<ASInterface>(new ASInterfaceImpl(ptr));
3471     }
3472 };
3473 
3474 template <class SharedPtrType>
makeASInterfacePtr(SharedPtrType asPtr)3475 ASInterfacePtr makeASInterfacePtr(SharedPtrType asPtr)
3476 {
3477     return ASInterfacePtr(new ASInterfaceImpl<SharedPtrType>(asPtr));
3478 }
3479 
3480 class QueryPoolResultsSizeInstance : public QueryPoolResultsInstance
3481 {
3482 public:
QueryPoolResultsSizeInstance(Context & context,QueryPoolResultsParamsPtr params)3483     QueryPoolResultsSizeInstance(Context &context, QueryPoolResultsParamsPtr params)
3484         : QueryPoolResultsInstance(context, params)
3485     {
3486     }
3487     TestStatus iterate(void) override;
3488     auto makeCopyOfStructures(const std::vector<ASInterfacePtr> &structs, const std::vector<VkDeviceSize> sizes)
3489         -> std::vector<ASInterfacePtr>;
3490     auto getStructureSizes(const std::vector<VkAccelerationStructureKHR> &handles) -> std::vector<VkDeviceSize>;
3491 };
3492 
3493 class QueryPoolResultsPointersInstance : public QueryPoolResultsInstance
3494 {
3495 public:
QueryPoolResultsPointersInstance(Context & context,QueryPoolResultsParamsPtr params)3496     QueryPoolResultsPointersInstance(Context &context, QueryPoolResultsParamsPtr params)
3497         : QueryPoolResultsInstance(context, params)
3498     {
3499     }
3500 
3501     TestStatus iterate(void) override;
3502 };
3503 
3504 class QueryPoolResultsCase : public TestCase
3505 {
3506 public:
QueryPoolResultsCase(TestContext & ctx,const char * name,QueryPoolResultsParamsPtr params)3507     QueryPoolResultsCase(TestContext &ctx, const char *name, QueryPoolResultsParamsPtr params)
3508         : TestCase(ctx, name)
3509         , m_params(params)
3510     {
3511     }
3512     void checkSupport(Context &context) const override;
3513     TestInstance *createInstance(Context &context) const override;
3514 
3515     template <class T, class P = T (*)[1], class R = decltype(std::begin(*std::declval<P>()))>
makeStdBeginEnd(void * p,uint32_t n)3516     static auto makeStdBeginEnd(void *p, uint32_t n) -> std::pair<R, R>
3517     {
3518         auto tmp   = std::begin(*P(p));
3519         auto begin = tmp;
3520         std::advance(tmp, n);
3521         return {begin, tmp};
3522     }
3523 
3524 private:
3525     const QueryPoolResultsParamsPtr m_params;
3526 };
3527 
createInstance(Context & context) const3528 TestInstance *QueryPoolResultsCase::createInstance(Context &context) const
3529 {
3530     switch (m_params->queryType)
3531     {
3532     case QueryPoolResultsParams::Type::StructureSize:
3533         return new QueryPoolResultsSizeInstance(context, m_params);
3534     case QueryPoolResultsParams::Type::PointerCount:
3535         return new QueryPoolResultsPointersInstance(context, m_params);
3536     }
3537     TCU_THROW(InternalError, "Unknown test type");
3538     return nullptr;
3539 }
3540 
checkSupport(Context & context) const3541 void QueryPoolResultsCase::checkSupport(Context &context) const
3542 {
3543     context.requireDeviceFunctionality(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
3544     context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME);
3545 
3546     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
3547         context.getAccelerationStructureFeatures();
3548     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR &&
3549         accelerationStructureFeaturesKHR.accelerationStructureHostCommands == false)
3550         TCU_THROW(NotSupportedError,
3551                   "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
3552 
3553     const VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR &maintenance1FeaturesKHR =
3554         context.getRayTracingMaintenance1Features();
3555     if (maintenance1FeaturesKHR.rayTracingMaintenance1 == VK_FALSE)
3556         TCU_THROW(NotSupportedError,
3557                   "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingMaintenance1");
3558 }
3559 
prepareBottomAccStructures(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer)3560 auto QueryPoolResultsInstance::prepareBottomAccStructures(const DeviceInterface &vk, VkDevice device,
3561                                                           Allocator &allocator, VkCommandBuffer cmdBuffer)
3562     -> std::vector<BlasPtr>
3563 {
3564     std::vector<Vec3> triangle = {
3565         {0.0, 0.0, 0.0},
3566         {0.5, 0.0, 0.0},
3567         {0.0, 0.5, 0.0},
3568     };
3569 
3570     const uint32_t triangleCount = ((1 + m_params->blasCount) * m_params->blasCount) / 2;
3571     const float angle            = (4.0f * std::acos(0.0f)) / float(triangleCount);
3572     auto rotateCcwZ              = [&](const Vec3 &p, const Vec3 &center) -> tcu::Vec3
3573     {
3574         const float s = std::sin(angle);
3575         const float c = std::cos(angle);
3576         const auto t  = p - center;
3577         return tcu::Vec3(c * t.x() - s * t.y(), s * t.x() + c * t.y(), t.z()) + center;
3578     };
3579     auto nextGeometry = [&]() -> void
3580     {
3581         for (auto &vertex : triangle)
3582             vertex = rotateCcwZ(vertex, Vec3(0.0f, 0.0f, 0.0f));
3583     };
3584 
3585     std::vector<BlasPtr> bottoms(m_params->blasCount);
3586 
3587     for (uint32_t b = 0; b < m_params->blasCount; ++b)
3588     {
3589         BlasPtr blas(makeBottomLevelAccelerationStructure().release());
3590 
3591         blas->setBuildType(m_params->buildType);
3592         if (m_params->compacted)
3593         {
3594             blas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3595         }
3596         blas->addGeometry(triangle, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3597         for (uint32_t geom = b; geom < m_params->blasCount; ++geom)
3598         {
3599             nextGeometry();
3600             blas->addGeometry(triangle, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
3601         }
3602 
3603         blas->createAndBuild(vk, device, cmdBuffer, allocator);
3604 
3605         bottoms[b] = blas;
3606     }
3607 
3608     return bottoms;
3609 }
3610 
prepareTopAccStructure(const DeviceInterface & vk,VkDevice device,Allocator & allocator,VkCommandBuffer cmdBuffer,const std::vector<BlasPtr> & bottoms)3611 auto QueryPoolResultsInstance::prepareTopAccStructure(const DeviceInterface &vk, VkDevice device, Allocator &allocator,
3612                                                       VkCommandBuffer cmdBuffer, const std::vector<BlasPtr> &bottoms)
3613     -> TlasPtr
3614 {
3615     const std::size_t instanceCount = bottoms.size();
3616 
3617     de::MovePtr<TopLevelAccelerationStructure> tlas = makeTopLevelAccelerationStructure();
3618     tlas->setBuildType(m_params->buildType);
3619     if (m_params->compacted)
3620     {
3621         tlas->setBuildFlags(VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
3622     }
3623     tlas->setInstanceCount(instanceCount);
3624 
3625     for (std::size_t i = 0; i < instanceCount; ++i)
3626     {
3627         tlas->addInstance(bottoms[i], identityMatrix3x4, 0, 0xFFu, 0u, VkGeometryInstanceFlagsKHR(0));
3628     }
3629 
3630     tlas->createAndBuild(vk, device, cmdBuffer, allocator);
3631 
3632     return TlasPtr(tlas.release());
3633 }
3634 
getStructureSizes(const std::vector<VkAccelerationStructureKHR> & handles)3635 auto QueryPoolResultsSizeInstance::getStructureSizes(const std::vector<VkAccelerationStructureKHR> &handles)
3636     -> std::vector<VkDeviceSize>
3637 {
3638     const DeviceInterface &vk  = m_context.getDeviceInterface();
3639     const VkDevice device      = m_context.getDevice();
3640     const uint32_t familyIndex = m_context.getUniversalQueueFamilyIndex();
3641     const VkQueue queue        = m_context.getUniversalQueue();
3642     Allocator &allocator       = m_context.getDefaultAllocator();
3643 
3644     const Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, familyIndex);
3645     const Move<VkCommandBuffer> cmdBuffer =
3646         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3647 
3648     const uint32_t queryCount = static_cast<uint32_t>(handles.size());
3649 
3650     Move<VkQueryPool> queryPoolSize =
3651         makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR, queryCount);
3652     Move<VkQueryPool> queryPoolSerial =
3653         makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, queryCount);
3654     Move<VkQueryPool> queryPoolCompact =
3655         m_params->compacted ?
3656             makeQueryPool(vk, device, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryCount) :
3657             Move<VkQueryPool>();
3658 
3659     de::MovePtr<BufferWithMemory> buffer;
3660     std::vector<VkDeviceSize> sizeSizes(queryCount, 0);
3661     std::vector<VkDeviceSize> serialSizes(queryCount, 0);
3662     std::vector<VkDeviceSize> compactSizes(queryCount, 0);
3663 
3664     if (m_params->inVkBuffer)
3665     {
3666         const auto vci = makeBufferCreateInfo((m_params->compacted ? 3 : 2) * queryCount * sizeof(VkDeviceSize),
3667                                               VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3668         buffer         = makeMovePtr<BufferWithMemory>(vk, device, allocator, vci,
3669                                                MemoryRequirement::Coherent | MemoryRequirement::HostVisible);
3670     }
3671 
3672     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3673     {
3674         beginCommandBuffer(vk, *cmdBuffer, 0);
3675 
3676         vk.cmdResetQueryPool(*cmdBuffer, *queryPoolSize, 0, queryCount);
3677         vk.cmdResetQueryPool(*cmdBuffer, *queryPoolSerial, 0, queryCount);
3678         if (m_params->compacted)
3679         {
3680             vk.cmdResetQueryPool(*cmdBuffer, *queryPoolCompact, 0, queryCount);
3681         }
3682 
3683         vk.cmdWriteAccelerationStructuresPropertiesKHR(
3684             *cmdBuffer, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR, *queryPoolSize, 0);
3685         vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(),
3686                                                        VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR,
3687                                                        *queryPoolSerial, 0);
3688 
3689         if (m_params->compacted)
3690         {
3691             vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(),
3692                                                            VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR,
3693                                                            *queryPoolCompact, 0);
3694         }
3695 
3696         if (m_params->inVkBuffer)
3697         {
3698             vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolSize, 0, queryCount, **buffer,
3699                                        (0 * queryCount * sizeof(VkDeviceSize)), sizeof(VkDeviceSize),
3700                                        VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3701             vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolSerial, 0, queryCount, **buffer,
3702                                        (1 * queryCount * sizeof(VkDeviceSize)), sizeof(VkDeviceSize),
3703                                        VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3704             if (m_params->compacted)
3705             {
3706                 vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolCompact, 0, queryCount, **buffer,
3707                                            (2 * queryCount * sizeof(VkDeviceSize)), sizeof(VkDeviceSize),
3708                                            VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3709             }
3710         }
3711         endCommandBuffer(vk, *cmdBuffer);
3712         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3713 
3714         if (m_params->inVkBuffer)
3715         {
3716             Allocation &alloc = buffer->getAllocation();
3717             invalidateMappedMemoryRange(vk, device, alloc.getMemory(), alloc.getOffset(), VK_WHOLE_SIZE);
3718 
3719             uint8_t *ptrSize   = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
3720             uint8_t *ptrSerial = ptrSize + queryCount * sizeof(VkDeviceSize);
3721 
3722             auto rangeSize   = QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrSize, queryCount);
3723             auto rangeSerial = QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrSerial, queryCount);
3724 
3725             std::copy_n(rangeSize.first, queryCount, sizeSizes.begin());
3726             std::copy_n(rangeSerial.first, queryCount, serialSizes.begin());
3727 
3728             if (m_params->compacted)
3729             {
3730                 auto ptrCompact   = ptrSize + 2 * queryCount * sizeof(VkDeviceSize);
3731                 auto rangeCompact = QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(ptrCompact, queryCount);
3732                 std::copy_n(rangeCompact.first, queryCount, compactSizes.begin());
3733             }
3734         }
3735         else
3736         {
3737             VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSize, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3738                                             sizeSizes.data(), sizeof(VkDeviceSize),
3739                                             VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3740             VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSerial, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3741                                             serialSizes.data(), sizeof(VkDeviceSize),
3742                                             VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3743             if (m_params->compacted)
3744             {
3745                 VK_CHECK(vk.getQueryPoolResults(
3746                     device, *queryPoolCompact, 0u, queryCount, queryCount * sizeof(VkDeviceSize), compactSizes.data(),
3747                     sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3748             }
3749         }
3750     }
3751     else
3752     {
3753         vk.writeAccelerationStructuresPropertiesKHR(
3754             device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR,
3755             queryCount * sizeof(VkDeviceSize), sizeSizes.data(), sizeof(VkDeviceSize));
3756         vk.writeAccelerationStructuresPropertiesKHR(
3757             device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR,
3758             queryCount * sizeof(VkDeviceSize), serialSizes.data(), sizeof(VkDeviceSize));
3759         if (m_params->compacted)
3760         {
3761             vk.writeAccelerationStructuresPropertiesKHR(
3762                 device, queryCount, handles.data(), VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR,
3763                 queryCount * sizeof(VkDeviceSize), compactSizes.data(), sizeof(VkDeviceSize));
3764         }
3765     }
3766 
3767     sizeSizes.insert(sizeSizes.end(), serialSizes.begin(), serialSizes.end());
3768     sizeSizes.insert(sizeSizes.end(), compactSizes.begin(), compactSizes.end());
3769 
3770     return sizeSizes;
3771 }
3772 
makeCopyOfStructures(const std::vector<ASInterfacePtr> & structs,const std::vector<VkDeviceSize> sizes)3773 auto QueryPoolResultsSizeInstance::makeCopyOfStructures(const std::vector<ASInterfacePtr> &structs,
3774                                                         const std::vector<VkDeviceSize> sizes)
3775     -> std::vector<ASInterfacePtr>
3776 {
3777     const DeviceInterface &vk = m_context.getDeviceInterface();
3778     const VkDevice device     = m_context.getDevice();
3779     const VkQueue queue       = m_context.getUniversalQueue();
3780 
3781     Move<VkCommandPool> cmdPool;
3782     Move<VkCommandBuffer> cmdBuffer;
3783 
3784     std::vector<ASInterfacePtr> copies;
3785 
3786     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3787     {
3788         const uint32_t familyIndex = m_context.getUniversalQueueFamilyIndex();
3789         cmdPool                    = createCommandPool(vk, device, 0, familyIndex);
3790         cmdBuffer                  = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3791         beginCommandBuffer(vk, *cmdBuffer, 0u);
3792     }
3793 
3794     for (auto begin = structs.begin(), i = begin; i != structs.end(); ++i)
3795     {
3796         copies.push_back((*i)->clone(m_context, m_params->buildType, *cmdBuffer, sizes.at(std::distance(begin, i))));
3797     }
3798 
3799     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3800     {
3801         endCommandBuffer(vk, *cmdBuffer);
3802         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3803     }
3804 
3805     return copies;
3806 }
3807 
iterate(void)3808 TestStatus QueryPoolResultsSizeInstance::iterate(void)
3809 {
3810     const DeviceInterface &vk  = m_context.getDeviceInterface();
3811     const VkDevice device      = m_context.getDevice();
3812     const uint32_t familyIndex = m_context.getUniversalQueueFamilyIndex();
3813     const VkQueue queue        = m_context.getUniversalQueue();
3814     Allocator &allocator       = m_context.getDefaultAllocator();
3815 
3816     const Move<VkCommandPool> cmdPool =
3817         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3818     const Move<VkCommandBuffer> cmdBuffer =
3819         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3820 
3821     beginCommandBuffer(vk, *cmdBuffer, 0);
3822     const std::vector<BlasPtr> bottoms = prepareBottomAccStructures(vk, device, allocator, *cmdBuffer);
3823     TlasPtr tlas                       = prepareTopAccStructure(vk, device, allocator, *cmdBuffer, bottoms);
3824     endCommandBuffer(vk, *cmdBuffer);
3825     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3826 
3827     const uint32_t queryCount = m_params->blasCount + 1;
3828     std::vector<VkAccelerationStructureKHR> handles(queryCount);
3829     handles[0] = *tlas->getPtr();
3830     std::transform(bottoms.begin(), bottoms.end(), std::next(handles.begin()),
3831                    [](const BlasPtr &blas) { return *blas->getPtr(); });
3832 
3833     // only the first queryCount elements are results from ACCELERATION_STRUCTURE_SIZE queries.
3834     const std::vector<VkDeviceSize> sourceSizes = getStructureSizes(handles);
3835 
3836     std::vector<ASInterfacePtr> sourceStructures;
3837     sourceStructures.push_back(makeASInterfacePtr(tlas));
3838     for (BlasPtr blas : bottoms)
3839         sourceStructures.push_back(makeASInterfacePtr(blas));
3840 
3841     std::vector<ASInterfacePtr> copies = makeCopyOfStructures(sourceStructures, sourceSizes);
3842     std::transform(copies.begin(), copies.end(), handles.begin(),
3843                    [](const ASInterfacePtr &intf) { return intf->getPtr(); });
3844 
3845     const std::vector<VkDeviceSize> copySizes = getStructureSizes(handles);
3846 
3847     // verification
3848     bool pass = true;
3849     for (uint32_t i = 0; pass && i < queryCount; ++i)
3850     {
3851         pass = sourceSizes.at(i) == copySizes.at(i);
3852     }
3853 
3854     return pass ? TestStatus::pass("") : TestStatus::fail("");
3855 }
3856 
iterate(void)3857 TestStatus QueryPoolResultsPointersInstance::iterate(void)
3858 {
3859     const DeviceInterface &vk  = m_context.getDeviceInterface();
3860     const VkDevice device      = m_context.getDevice();
3861     const uint32_t familyIndex = m_context.getUniversalQueueFamilyIndex();
3862     const VkQueue queue        = m_context.getUniversalQueue();
3863     Allocator &allocator       = m_context.getDefaultAllocator();
3864 
3865     const Move<VkCommandPool> cmdPool =
3866         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, familyIndex);
3867     const Move<VkCommandBuffer> cmdBuffer =
3868         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3869 
3870     beginCommandBuffer(vk, *cmdBuffer, 0);
3871     const std::vector<BlasPtr> bottoms = prepareBottomAccStructures(vk, device, allocator, *cmdBuffer);
3872     TlasPtr tlas                       = prepareTopAccStructure(vk, device, allocator, *cmdBuffer, bottoms);
3873     endCommandBuffer(vk, *cmdBuffer);
3874     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3875 
3876     const uint32_t queryCount = m_params->blasCount + 1;
3877     std::vector<VkAccelerationStructureKHR> handles(queryCount);
3878     handles[0] = *tlas.get()->getPtr();
3879     std::transform(bottoms.begin(), bottoms.end(), std::next(handles.begin()),
3880                    [](const BlasPtr &blas) { return *blas.get()->getPtr(); });
3881 
3882     const VkQueryType queryType       = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR;
3883     Move<VkQueryPool> queryPoolCounts = makeQueryPool(vk, device, queryType, queryCount);
3884 
3885     de::MovePtr<BufferWithMemory> buffer;
3886     std::vector<VkDeviceSize> pointerCounts(queryCount, 123u);
3887 
3888     if (m_params->inVkBuffer)
3889     {
3890         const auto vci = makeBufferCreateInfo(queryCount * sizeof(VkDeviceSize), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3891         buffer         = makeMovePtr<BufferWithMemory>(vk, device, allocator, vci,
3892                                                MemoryRequirement::Coherent | MemoryRequirement::HostVisible);
3893     }
3894 
3895     if (m_params->buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
3896     {
3897         beginCommandBuffer(vk, *cmdBuffer, 0);
3898         vk.cmdResetQueryPool(*cmdBuffer, *queryPoolCounts, 0, queryCount);
3899         vk.cmdWriteAccelerationStructuresPropertiesKHR(*cmdBuffer, queryCount, handles.data(), queryType,
3900                                                        *queryPoolCounts, 0);
3901         if (m_params->inVkBuffer)
3902         {
3903             vk.cmdCopyQueryPoolResults(*cmdBuffer, *queryPoolCounts, 0, queryCount, **buffer, 0 /*offset*/,
3904                                        sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
3905         }
3906         endCommandBuffer(vk, *cmdBuffer);
3907         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3908 
3909         if (m_params->inVkBuffer)
3910         {
3911             Allocation &alloc = buffer->getAllocation();
3912             invalidateMappedMemoryRange(vk, device, alloc.getMemory(), alloc.getOffset(), VK_WHOLE_SIZE);
3913             auto rangeCounts = QueryPoolResultsCase::makeStdBeginEnd<VkDeviceSize>(alloc.getHostPtr(), queryCount);
3914             std::copy_n(rangeCounts.first, queryCount, pointerCounts.begin());
3915         }
3916         else
3917         {
3918             VK_CHECK(vk.getQueryPoolResults(device, *queryPoolCounts, 0u, queryCount, queryCount * sizeof(VkDeviceSize),
3919                                             pointerCounts.data(), sizeof(VkDeviceSize),
3920                                             VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
3921         }
3922     }
3923     else
3924     {
3925         vk.writeAccelerationStructuresPropertiesKHR(device, queryCount, handles.data(), queryType,
3926                                                     queryCount * sizeof(VkDeviceSize), pointerCounts.data(),
3927                                                     sizeof(VkDeviceSize));
3928     }
3929 
3930     // verification
3931     const std::vector<VkDeviceSize> inSizes = tlas->getSerializingSizes(vk, device, queue, familyIndex);
3932     SerialStorage storage(vk, device, allocator, m_params->buildType, inSizes[0]);
3933 
3934     beginCommandBuffer(vk, *cmdBuffer, 0);
3935     tlas->serialize(vk, device, *cmdBuffer, &storage);
3936     endCommandBuffer(vk, *cmdBuffer);
3937     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3938 
3939     const SerialStorage::AccelerationStructureHeader *header = storage.getASHeader();
3940 
3941     bool pass = (header->handleCount == pointerCounts[0]); // must be the same as bottoms.size()
3942     for (uint32_t i = 1; pass && i < queryCount; ++i)
3943     {
3944         pass = (0 == pointerCounts[i]); // bottoms have no chidren
3945     }
3946 
3947     return pass ? TestStatus::pass("") : TestStatus::fail("");
3948 }
3949 
3950 struct CopyWithinPipelineParams
3951 {
3952     enum class Type
3953     {
3954         StageASCopyBit,
3955         StageAllTransferBit,
3956         AccessSBTReadBit
3957     } type;
3958     uint32_t width;
3959     uint32_t height;
3960     VkAccelerationStructureBuildTypeKHR build;
3961 };
3962 typedef de::SharedPtr<const CopyWithinPipelineParams> CopyWithinPipelineParamsPtr;
3963 
3964 class CopyWithinPipelineInstance : public TestInstance
3965 {
3966 public:
3967     using TlasPtr = de::SharedPtr<TopLevelAccelerationStructure>;
3968     using BlasPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
3969 
CopyWithinPipelineInstance(Context & context,CopyWithinPipelineParamsPtr params)3970     CopyWithinPipelineInstance(Context &context, CopyWithinPipelineParamsPtr params)
3971         : TestInstance(context)
3972         , vk(context.getDeviceInterface())
3973         , device(context.getDevice())
3974         , allocator(context.getDefaultAllocator())
3975         , rgenShader(createShaderModule(vk, device, context.getBinaryCollection().get("rgen")))
3976         , chitShader(createShaderModule(vk, device, context.getBinaryCollection().get("chit")))
3977         , missShader(createShaderModule(vk, device, context.getBinaryCollection().get("miss")))
3978         , m_params(params)
3979         , m_format(VK_FORMAT_R32G32B32A32_SFLOAT)
3980     {
3981     }
3982 
3983 protected:
3984     const DeviceInterface &vk;
3985     const VkDevice device;
3986     Allocator &allocator;
3987     Move<VkShaderModule> rgenShader;
3988     Move<VkShaderModule> chitShader;
3989     Move<VkShaderModule> missShader;
3990     CopyWithinPipelineParamsPtr m_params;
3991     VkFormat m_format;
3992 };
3993 
3994 class CopyBlasInstance : public CopyWithinPipelineInstance
3995 {
3996 public:
CopyBlasInstance(Context & context,CopyWithinPipelineParamsPtr params)3997     CopyBlasInstance(Context &context, CopyWithinPipelineParamsPtr params) : CopyWithinPipelineInstance(context, params)
3998     {
3999     }
4000     TestStatus iterate(void) override;
4001     auto getRefImage(BlasPtr blas) const -> de::MovePtr<BufferWithMemory>;
4002 };
4003 
4004 class CopySBTInstance : public CopyWithinPipelineInstance
4005 {
4006 public:
CopySBTInstance(Context & context,CopyWithinPipelineParamsPtr params)4007     CopySBTInstance(Context &context, CopyWithinPipelineParamsPtr params) : CopyWithinPipelineInstance(context, params)
4008     {
4009     }
4010     TestStatus iterate(void) override;
4011     auto getBufferSizeForSBT(const uint32_t &groupCount, const uint32_t &shaderGroupHandleSize,
4012                              const uint32_t &shaderGroupBaseAlignment) const -> VkDeviceSize;
4013     auto getBufferForSBT(const uint32_t &groupCount, const uint32_t &shaderGroupHandleSize,
4014                          const uint32_t &shaderGroupBaseAlignment) const -> de::MovePtr<BufferWithMemory>;
4015 };
4016 
4017 class PipelineStageASCase : public TestCase
4018 {
4019 public:
PipelineStageASCase(TestContext & ctx,const char * name,CopyWithinPipelineParamsPtr params)4020     PipelineStageASCase(TestContext &ctx, const char *name, CopyWithinPipelineParamsPtr params)
4021         : TestCase(ctx, name)
4022         , m_params(params)
4023     {
4024     }
4025     void initPrograms(SourceCollections &programs) const override;
4026     void checkSupport(Context &context) const override;
4027     TestInstance *createInstance(Context &context) const override;
4028 
4029 private:
4030     CopyWithinPipelineParamsPtr m_params;
4031 };
4032 
4033 namespace u
4034 {
4035 namespace details
4036 {
4037 template <class X, class Y>
4038 struct BarrierMaker
4039 {
4040     const X &m_x;
BarrierMakervkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4041     BarrierMaker(const X &x) : m_x(x)
4042     {
4043     }
countvkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4044     uint32_t count() const
4045     {
4046         return 1;
4047     }
pointervkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4048     const X *pointer() const
4049     {
4050         return &m_x;
4051     }
4052 };
4053 template <class Y>
4054 struct BarrierMaker<std::false_type, Y>
4055 {
BarrierMakervkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4056     BarrierMaker(const std::false_type &)
4057     {
4058     }
countvkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4059     uint32_t count() const
4060     {
4061         return 0;
4062     }
pointervkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4063     Y *pointer() const
4064     {
4065         return nullptr;
4066     }
4067 };
4068 template <class Z, uint32_t N>
4069 struct BarrierMaker<const Z[N], Z>
4070 {
4071     const Z (&m_a)[N];
BarrierMakervkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4072     BarrierMaker(const Z (&a)[N]) : m_a(a)
4073     {
4074     }
countvkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4075     uint32_t count() const
4076     {
4077         return N;
4078     }
pointervkt::RayTracing::__anona25d47410111::u::details::BarrierMaker4079     const Z *pointer() const
4080     {
4081         return m_a;
4082     }
4083 };
4084 template <class Mem, class Buf, class Img, class Exp>
4085 struct Sel
4086 {
4087     typedef typename std::remove_cv<Mem>::type t_Mem;
4088     typedef typename std::remove_cv<Buf>::type t_Buf;
4089     typedef typename std::remove_cv<Img>::type t_Img;
4090     typedef std::integral_constant<uint32_t, 0> index0;
4091     typedef std::integral_constant<uint32_t, 1> index1;
4092     typedef std::integral_constant<uint32_t, 2> index2;
4093     typedef std::integral_constant<uint32_t, 3> index3;
4094     using isMem = std::is_same<t_Mem, Exp>;
4095     using isBuf = std::is_same<t_Buf, Exp>;
4096     using isImg = std::is_same<t_Img, Exp>;
4097     template <bool B, class T, class F>
4098     using choose = typename std::conditional<B, T, F>::type;
4099     typedef choose<isMem::value, BarrierMaker<Mem, Exp>,
4100                    choose<isBuf::value, BarrierMaker<Buf, Exp>,
4101                           choose<isImg::value, BarrierMaker<Img, Exp>, BarrierMaker<std::false_type, Exp>>>>
4102         type;
4103     typedef choose<isMem::value, index0, choose<isBuf::value, index1, choose<isImg::value, index2, index3>>> index;
4104 };
4105 } // namespace details
4106 constexpr std::false_type NoneBarriers{};
4107 /**
4108  * @brief    Helper function that makes and populates VkDependencyInfoKHR structure.
4109  * @param    barriers1 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (mandatory param)
4110  * @param    barriers2 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (optional param)
4111  * @param    barriers2 - any of VkMemoryBarrier2KHR, VkBufferMemoryBarrier2KHR or VkImageMemoryBarrier2KHR (optional param)
4112  * @note    The order of the parameters does not matter.
4113  */
4114 template <class Barriers1, class Barriers2 = std::false_type, class Barriers3 = std::false_type>
makeDependency(const Barriers1 & barriers1,const Barriers2 & barriers2=NoneBarriers,const Barriers3 & barriers3=NoneBarriers)4115 VkDependencyInfoKHR makeDependency(const Barriers1 &barriers1, const Barriers2 &barriers2 = NoneBarriers,
4116                                    const Barriers3 &barriers3 = NoneBarriers)
4117 {
4118     auto args               = std::forward_as_tuple(barriers1, barriers2, barriers3, std::false_type());
4119     const uint32_t memIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkMemoryBarrier2KHR>::index::value;
4120     const uint32_t bufIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkBufferMemoryBarrier2KHR>::index::value;
4121     const uint32_t imgIndex = details::Sel<Barriers1, Barriers2, Barriers3, VkImageMemoryBarrier2KHR>::index::value;
4122     typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkMemoryBarrier2KHR>::type memType;
4123     typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkBufferMemoryBarrier2KHR>::type bufType;
4124     typedef typename details::Sel<Barriers1, Barriers2, Barriers3, VkImageMemoryBarrier2KHR>::type imgType;
4125     return {
4126         VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,       // VkStructureType sType;
4127         nullptr,                                     // const void* pNext;
4128         VK_DEPENDENCY_BY_REGION_BIT,                 // VkDependencyFlags dependencyFlags;
4129         memType(std::get<memIndex>(args)).count(),   // uint32_t memoryBarrierCount;
4130         memType(std::get<memIndex>(args)).pointer(), // const VkMemoryBarrier2KHR* pMemoryBarriers;
4131         bufType(std::get<bufIndex>(args)).count(),   // uint32_t bufferMemoryBarrierCount;
4132         bufType(std::get<bufIndex>(args)).pointer(), // const VkBufferMemoryBarrier2KHR* pBufferMemoryBarriers;
4133         imgType(std::get<imgIndex>(args)).count(),   // uint32_t imageMemoryBarrierCount;
4134         imgType(std::get<imgIndex>(args)).pointer()  // const VkImageMemoryBarrier2KHR* pImageMemoryBarriers;
4135     };
4136 }
4137 } // namespace u
4138 
createInstance(Context & context) const4139 TestInstance *PipelineStageASCase::createInstance(Context &context) const
4140 {
4141     de::MovePtr<TestInstance> instance;
4142     switch (m_params->type)
4143     {
4144     case CopyWithinPipelineParams::Type::StageASCopyBit:
4145     case CopyWithinPipelineParams::Type::StageAllTransferBit:
4146         instance = makeMovePtr<CopyBlasInstance>(context, m_params);
4147         break;
4148     case CopyWithinPipelineParams::Type::AccessSBTReadBit:
4149         instance = makeMovePtr<CopySBTInstance>(context, m_params);
4150         break;
4151     }
4152     return instance.release();
4153 }
4154 
initPrograms(SourceCollections & programs) const4155 void PipelineStageASCase::initPrograms(SourceCollections &programs) const
4156 {
4157     const vk::ShaderBuildOptions buildOptions(programs.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4158     const char endl = '\n';
4159 
4160     {
4161         std::stringstream str;
4162         str << "#version 460 core" << endl
4163             << "#extension GL_EXT_ray_tracing : require" << endl
4164             << "layout(location = 0) rayPayloadEXT vec4 payload;" << endl
4165             << "layout(rgba32f, set = 0, binding = 0) uniform image2D result;" << endl
4166             << "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;" << endl
4167             << "void main()" << endl
4168             << "{" << endl
4169             << "  float rx           = (float(gl_LaunchIDEXT.x) + 0.5) / float(gl_LaunchSizeEXT.x);" << endl
4170             << "  float ry           = (float(gl_LaunchIDEXT.y) + 0.5) / float(gl_LaunchSizeEXT.y);" << endl
4171             << "  payload            = vec4(0.5, 0.5, 0.5, 1.0);" << endl
4172             << "  vec3  orig         = vec3(rx, ry, 1.0);" << endl
4173             << "  vec3  dir          = vec3(0.0, 0.0, -1.0);" << endl
4174             << "  traceRayEXT(topLevelAS, gl_RayFlagsNoneEXT, 0xFFu, 0, 0, 0, orig, 0.0, dir, 2.0, 0);" << endl
4175             << "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), payload);" << endl
4176             << "}";
4177         str.flush();
4178         programs.glslSources.add("rgen") << glu::RaygenSource(str.str()) << buildOptions;
4179     }
4180 
4181     {
4182         std::stringstream str;
4183         str << "#version 460 core" << endl
4184             << "#extension GL_EXT_ray_tracing : require" << endl
4185             << "layout(location = 0) rayPayloadInEXT vec4 payload;" << endl
4186             << "void main()" << endl
4187             << "{" << endl
4188             << "  payload = vec4(0.0, 1.0, 0.0, 1.0);" << endl
4189             << "}";
4190         str.flush();
4191         programs.glslSources.add("chit") << glu::ClosestHitSource(str.str()) << buildOptions;
4192     }
4193 
4194     {
4195         std::stringstream str;
4196         str << "#version 460 core" << endl
4197             << "#extension GL_EXT_ray_tracing : require" << endl
4198             << "layout(location = 0) rayPayloadInEXT vec4 payload;" << endl
4199             << "void main()" << endl
4200             << "{" << endl
4201             << "  payload = vec4(1.0, 0.0, 0.0, 1.0);" << endl
4202             << "}";
4203         str.flush();
4204         programs.glslSources.add("miss") << glu::MissSource(str.str()) << buildOptions;
4205     }
4206 }
4207 
checkSupport(Context & context) const4208 void PipelineStageASCase::checkSupport(Context &context) const
4209 {
4210     context.requireInstanceFunctionality(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
4211     context.requireDeviceFunctionality(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
4212     context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME);
4213     context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME);
4214     context.requireDeviceFunctionality(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
4215 
4216     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
4217         context.getAccelerationStructureFeatures();
4218     if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR &&
4219         accelerationStructureFeaturesKHR.accelerationStructureHostCommands == false)
4220         TCU_THROW(NotSupportedError,
4221                   "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructureHostCommands");
4222 
4223     const VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR &maintenance1FeaturesKHR =
4224         context.getRayTracingMaintenance1Features();
4225     if (maintenance1FeaturesKHR.rayTracingMaintenance1 == VK_FALSE)
4226         TCU_THROW(NotSupportedError,
4227                   "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingMaintenance1");
4228 
4229     const VkPhysicalDeviceSynchronization2FeaturesKHR &synchronization2Features = context.getSynchronization2Features();
4230     if (synchronization2Features.synchronization2 == VK_FALSE)
4231         TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceSynchronization2FeaturesKHR::synchronization2");
4232 
4233     if (m_params->type != CopyWithinPipelineParams::Type::AccessSBTReadBit)
4234     {
4235         context.requireDeviceFunctionality(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
4236         const VkPhysicalDevicePushDescriptorPropertiesKHR &pushDescriptorProperties =
4237             context.getPushDescriptorProperties();
4238         if (pushDescriptorProperties.maxPushDescriptors < 32)
4239             TCU_THROW(NotSupportedError, "Requires VK_KHR_push_descriptor extension");
4240     }
4241 }
4242 
getRefImage(BlasPtr blas) const4243 auto CopyBlasInstance::getRefImage(BlasPtr blas) const -> de::MovePtr<BufferWithMemory>
4244 {
4245     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
4246     const VkQueue queue             = m_context.getUniversalQueue();
4247 
4248     const de::MovePtr<RayTracingProperties> rtProps =
4249         makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
4250     const uint32_t shaderGroupHandleSize    = rtProps->getShaderGroupHandleSize();
4251     const uint32_t shaderGroupBaseAlignment = rtProps->getShaderGroupBaseAlignment();
4252 
4253     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_params->width, m_params->height, m_format);
4254     const VkImageSubresourceRange imageSubresourceRange =
4255         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4256     const de::MovePtr<ImageWithMemory> image =
4257         makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
4258     const Move<VkImageView> view =
4259         makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
4260 
4261     const uint32_t bufferSize = (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
4262     const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4263     de::MovePtr<BufferWithMemory> buffer =
4264         makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4265 
4266     const VkImageSubresourceLayers imageSubresourceLayers =
4267         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4268     const VkBufferImageCopy bufferCopyImageRegion =
4269         makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
4270 
4271     de::MovePtr<RayTracingPipeline> rtPipeline = makeMovePtr<RayTracingPipeline>();
4272     rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, *rgenShader, 0);
4273     rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, *chitShader, 1);
4274     rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, *missShader, 2);
4275 
4276     const Move<VkDescriptorPool> descriptorPool =
4277         DescriptorPoolBuilder()
4278             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2)
4279             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 2)
4280             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4281     const Move<VkDescriptorSetLayout> descriptorSetLayout =
4282         DescriptorSetLayoutBuilder()
4283             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4284             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4285             .build(vk, device);
4286     const Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
4287 
4288     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout);
4289     Move<VkPipeline> pipeline                   = rtPipeline->createPipeline(vk, device, *pipelineLayout);
4290 
4291     de::MovePtr<BufferWithMemory> rgenSbt = rtPipeline->createShaderBindingTable(
4292         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
4293     VkStridedDeviceAddressRegionKHR rgenRegion = makeStridedDeviceAddressRegionKHR(
4294         getBufferDeviceAddress(vk, device, **rgenSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4295     de::MovePtr<BufferWithMemory> chitSbt = rtPipeline->createShaderBindingTable(
4296         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
4297     VkStridedDeviceAddressRegionKHR chitRegion = makeStridedDeviceAddressRegionKHR(
4298         getBufferDeviceAddress(vk, device, **chitSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4299     de::MovePtr<BufferWithMemory> missSbt = rtPipeline->createShaderBindingTable(
4300         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
4301     VkStridedDeviceAddressRegionKHR missRegion = makeStridedDeviceAddressRegionKHR(
4302         getBufferDeviceAddress(vk, device, **missSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4303     const VkStridedDeviceAddressRegionKHR callRegion = makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
4304 
4305     const VkClearValue clearValue = {{{0.1f, 0.2f, 0.3f, 0.4f}}};
4306 
4307     const VkImageMemoryBarrier2KHR preClearImageImageBarrier = makeImageMemoryBarrier2(
4308         VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0, VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
4309         VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image,
4310         imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4311     const VkImageMemoryBarrier2KHR postClearImageImageBarrier =
4312         makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4313                                 VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
4314                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image,
4315                                 imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4316     const VkDependencyInfoKHR preClearImageDependency  = u::makeDependency(preClearImageImageBarrier);
4317     const VkDependencyInfoKHR postClearImageDependency = u::makeDependency(postClearImageImageBarrier);
4318 
4319     const VkImageMemoryBarrier2KHR postTraceRaysImageBarrier = makeImageMemoryBarrier2(
4320         VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
4321         VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR, VK_IMAGE_LAYOUT_GENERAL,
4322         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4323     const VkImageMemoryBarrier2KHR postCopyImageImageBarrier = makeImageMemoryBarrier2(
4324         VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, VK_PIPELINE_STAGE_2_HOST_BIT_KHR,
4325         VK_ACCESS_2_HOST_READ_BIT_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4326         **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4327     const VkDependencyInfoKHR postTraceRaysDependency = u::makeDependency(postTraceRaysImageBarrier);
4328     const VkDependencyInfoKHR postCopyImageDependency = u::makeDependency(postCopyImageImageBarrier);
4329 
4330     const Move<VkCommandPool> cmdPool =
4331         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
4332     const Move<VkCommandBuffer> cmdBuffer =
4333         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4334 
4335     auto tlas = makeTopLevelAccelerationStructure();
4336     tlas->setBuildType(m_params->build);
4337     tlas->setInstanceCount(1);
4338     tlas->addInstance(blas, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4339     beginCommandBuffer(vk, *cmdBuffer);
4340     tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4341     endCommandBuffer(vk, *cmdBuffer);
4342     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4343 
4344     const VkDescriptorImageInfo descriptorImageInfo =
4345         makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4346     const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas{
4347         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
4348         nullptr,                                                           //  const void* pNext;
4349         1,                                                                 //  uint32_t accelerationStructureCount;
4350         tlas->getPtr() //  const VkAccelerationStructureKHR* pAccelerationStructures;
4351     };
4352 
4353     DescriptorSetUpdateBuilder()
4354         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4355                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4356         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
4357                      VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4358         .update(vk, device);
4359 
4360     beginCommandBuffer(vk, *cmdBuffer);
4361     vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4362     vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
4363                              &descriptorSet.get(), 0, nullptr);
4364     vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4365     vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
4366                           &imageSubresourceRange);
4367     vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4368     cmdTraceRays(vk, *cmdBuffer,
4369                  &rgenRegion, // rgen
4370                  &missRegion, // miss
4371                  &chitRegion, // hit
4372                  &callRegion, // call
4373                  m_params->width, m_params->height, 1);
4374     vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4375     vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **buffer, 1u,
4376                             &bufferCopyImageRegion);
4377     vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4378     endCommandBuffer(vk, *cmdBuffer);
4379     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4380 
4381     invalidateMappedMemoryRange(vk, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(),
4382                                 bufferSize);
4383 
4384     return buffer;
4385 }
4386 
iterate(void)4387 TestStatus CopyBlasInstance::iterate(void)
4388 {
4389     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
4390     const VkQueue queue             = m_context.getUniversalQueue();
4391 
4392     const de::MovePtr<RayTracingProperties> rtProps =
4393         makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
4394     const uint32_t shaderGroupHandleSize    = rtProps->getShaderGroupHandleSize();
4395     const uint32_t shaderGroupBaseAlignment = rtProps->getShaderGroupBaseAlignment();
4396 
4397     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_params->width, m_params->height, m_format);
4398     const VkImageSubresourceRange imageSubresourceRange =
4399         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4400     const de::MovePtr<ImageWithMemory> image =
4401         makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
4402     const Move<VkImageView> view =
4403         makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
4404 
4405     const uint32_t bufferSize = (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
4406     const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4407     de::MovePtr<BufferWithMemory> resultImageBuffer =
4408         makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4409 
4410     const VkImageSubresourceLayers imageSubresourceLayers =
4411         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4412     const VkBufferImageCopy bufferCopyImageRegion =
4413         makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
4414 
4415     de::MovePtr<RayTracingPipeline> rtPipeline = makeMovePtr<RayTracingPipeline>();
4416     rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, *rgenShader, 0);
4417     rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, *chitShader, 1);
4418     rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, *missShader, 2);
4419 
4420     const Move<VkDescriptorSetLayout> descriptorSetLayout =
4421         DescriptorSetLayoutBuilder()
4422             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4423             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4424             .build(vk, device, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
4425 
4426     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout);
4427     Move<VkPipeline> pipeline                   = rtPipeline->createPipeline(vk, device, *pipelineLayout);
4428 
4429     de::MovePtr<BufferWithMemory> rgenSbt = rtPipeline->createShaderBindingTable(
4430         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
4431     VkStridedDeviceAddressRegionKHR rgenRegion = makeStridedDeviceAddressRegionKHR(
4432         getBufferDeviceAddress(vk, device, **rgenSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4433     de::MovePtr<BufferWithMemory> chitSbt = rtPipeline->createShaderBindingTable(
4434         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
4435     VkStridedDeviceAddressRegionKHR chitRegion = makeStridedDeviceAddressRegionKHR(
4436         getBufferDeviceAddress(vk, device, **chitSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4437     de::MovePtr<BufferWithMemory> missSbt = rtPipeline->createShaderBindingTable(
4438         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
4439     VkStridedDeviceAddressRegionKHR missRegion = makeStridedDeviceAddressRegionKHR(
4440         getBufferDeviceAddress(vk, device, **missSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4441     const VkStridedDeviceAddressRegionKHR callRegion = makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
4442 
4443     const VkClearValue clearValue = {{{0.1f, 0.2f, 0.3f, 0.4f}}};
4444 
4445     const VkImageMemoryBarrier2KHR preClearImageImageBarrier = makeImageMemoryBarrier2(
4446         VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0, VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
4447         VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image,
4448         imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4449     const VkImageMemoryBarrier2KHR postClearImageImageBarrier =
4450         makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4451                                 VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
4452                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image,
4453                                 imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4454     const VkDependencyInfoKHR preClearImageDependency  = u::makeDependency(preClearImageImageBarrier);
4455     const VkDependencyInfoKHR postClearImageDependency = u::makeDependency(postClearImageImageBarrier);
4456 
4457     const VkImageMemoryBarrier2KHR postTraceRaysImageBarrier = makeImageMemoryBarrier2(
4458         VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
4459         VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR, VK_IMAGE_LAYOUT_GENERAL,
4460         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4461     const VkImageMemoryBarrier2KHR postCopyImageImageBarrier = makeImageMemoryBarrier2(
4462         VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, VK_PIPELINE_STAGE_2_HOST_BIT_KHR,
4463         VK_ACCESS_2_HOST_READ_BIT_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4464         **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4465     const VkDependencyInfoKHR postTraceRaysDependency = u::makeDependency(postTraceRaysImageBarrier);
4466     const VkDependencyInfoKHR postCopyImageDependency = u::makeDependency(postCopyImageImageBarrier);
4467     const VkPipelineStageFlags2KHR srcStageMask     = m_params->type == CopyWithinPipelineParams::Type::StageASCopyBit ?
4468                                                           VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR :
4469                                                           VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT_KHR;
4470     const VkMemoryBarrier2KHR copyBlasMemoryBarrier = makeMemoryBarrier2(
4471         srcStageMask, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
4472         VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR);
4473     const VkDependencyInfoKHR copyBlasDependency = u::makeDependency(copyBlasMemoryBarrier);
4474 
4475     const Move<VkCommandPool> cmdPool =
4476         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
4477     const Move<VkCommandBuffer> cmdBuffer =
4478         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4479 
4480     std::vector<VkDeviceSize> blasSize(1);
4481     BlasPtr blas1(makeBottomLevelAccelerationStructure().release());
4482 
4483     // After this block the blas1 stays on device or host respectively to its build type.
4484     // Once it is created it is asked for the serialization size that will be used for a
4485     // creation of an empty blas2. Probably this size will be bigger than it is needed but
4486     // one thing that is important is it must not be less.
4487     {
4488         const VkQueryType query         = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
4489         Move<VkQueryPool> queryPoolSize = makeQueryPool(vk, device, query, 1);
4490         beginCommandBuffer(vk, *cmdBuffer);
4491         blas1->setBuildType(m_params->build);
4492         blas1->setGeometryData({{0.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}}, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
4493         blas1->createAndBuild(vk, device, *cmdBuffer, allocator);
4494         queryAccelerationStructureSize(vk, device, *cmdBuffer, {*blas1->getPtr()}, m_params->build, *queryPoolSize,
4495                                        query, 0u, blasSize);
4496         endCommandBuffer(vk, *cmdBuffer);
4497         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4498         if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
4499         {
4500             VK_CHECK(vk.getQueryPoolResults(device, *queryPoolSize, 0u, 1, sizeof(VkDeviceSize), blasSize.data(),
4501                                             sizeof(VkDeviceSize), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
4502         }
4503     }
4504 
4505     de::MovePtr<BufferWithMemory> referenceImageBuffer = getRefImage(blas1);
4506 
4507     // Create blas2 as empty struct
4508     BlasPtr blas2(makeBottomLevelAccelerationStructure().release());
4509     blas2->create(vk, device, allocator, blasSize[0]);
4510 
4511     auto tlas = makeTopLevelAccelerationStructure();
4512     tlas->setBuildType(m_params->build);
4513     tlas->setInstanceCount(1);
4514     tlas->addInstance(blas2, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4515 
4516     const VkCopyAccelerationStructureInfoKHR copyBlasInfo{
4517         VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR, // VkStructureType sType;
4518         nullptr,                                                // const void* pNext;
4519         *blas1->getPtr(),                                       // VkAccelerationStructureKHR src;
4520         *blas2->getPtr(),                                       // VkAccelerationStructureKHR dst;
4521         VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR           // VkCopyAccelerationStructureModeKHR mode;
4522     };
4523 
4524     beginCommandBuffer(vk, *cmdBuffer);
4525     vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4526 
4527     if (m_params->build == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR)
4528     {
4529         vk.cmdCopyAccelerationStructureKHR(*cmdBuffer, &copyBlasInfo);
4530         vk.cmdPipelineBarrier2(*cmdBuffer, &copyBlasDependency);
4531     }
4532     else
4533         VK_CHECK(vk.copyAccelerationStructureKHR(device, VkDeferredOperationKHR(0), &copyBlasInfo));
4534 
4535     tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4536 
4537     const VkDescriptorImageInfo descriptorImageInfo =
4538         makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4539     const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas{
4540         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
4541         nullptr,                                                           //  const void* pNext;
4542         1,                                                                 //  uint32_t accelerationStructureCount;
4543         tlas->getPtr() //  const VkAccelerationStructureKHR* pAccelerationStructures;
4544     };
4545 
4546     DescriptorSetUpdateBuilder()
4547         .writeSingle(VkDescriptorSet(), DescriptorSetUpdateBuilder::Location::binding(0u),
4548                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4549         .writeSingle(VkDescriptorSet(), DescriptorSetUpdateBuilder::Location::binding(1u),
4550                      VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4551         .updateWithPush(vk, *cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 0, 2);
4552 
4553     vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4554     vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
4555                           &imageSubresourceRange);
4556     vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4557 
4558     cmdTraceRays(vk, *cmdBuffer,
4559                  &rgenRegion, // rgen
4560                  &missRegion, // miss
4561                  &chitRegion, // hit
4562                  &callRegion, // call
4563                  m_params->width, m_params->height, 1);
4564 
4565     vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4566     vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **resultImageBuffer, 1u,
4567                             &bufferCopyImageRegion);
4568     vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4569 
4570     endCommandBuffer(vk, *cmdBuffer);
4571     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4572 
4573     invalidateMappedMemoryRange(vk, device, resultImageBuffer->getAllocation().getMemory(),
4574                                 resultImageBuffer->getAllocation().getOffset(), bufferSize);
4575 
4576     const void *referenceImageData = referenceImageBuffer->getAllocation().getHostPtr();
4577     const void *resultImageData    = resultImageBuffer->getAllocation().getHostPtr();
4578 
4579     return (deMemCmp(referenceImageData, resultImageData, bufferSize) == 0) ?
4580                TestStatus::pass("") :
4581                TestStatus::fail("Reference and result images differ");
4582 }
4583 
getBufferSizeForSBT(const uint32_t & groupCount,const uint32_t & shaderGroupHandleSize,const uint32_t & shaderGroupBaseAlignment) const4584 VkDeviceSize CopySBTInstance::getBufferSizeForSBT(const uint32_t &groupCount, const uint32_t &shaderGroupHandleSize,
4585                                                   const uint32_t &shaderGroupBaseAlignment) const
4586 {
4587     DE_UNREF(shaderGroupBaseAlignment);
4588     return (groupCount * deAlign32(shaderGroupHandleSize, shaderGroupHandleSize));
4589 }
4590 
getBufferForSBT(const uint32_t & groupCount,const uint32_t & shaderGroupHandleSize,const uint32_t & shaderGroupBaseAlignment) const4591 de::MovePtr<BufferWithMemory> CopySBTInstance::getBufferForSBT(const uint32_t &groupCount,
4592                                                                const uint32_t &shaderGroupHandleSize,
4593                                                                const uint32_t &shaderGroupBaseAlignment) const
4594 {
4595     const VkDeviceSize sbtSize = getBufferSizeForSBT(groupCount, shaderGroupHandleSize, shaderGroupBaseAlignment);
4596     const VkBufferUsageFlags sbtFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
4597                                         VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR |
4598                                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
4599     const VkBufferCreateInfo sbtCreateInfo = makeBufferCreateInfo(sbtSize, sbtFlags);
4600     const MemoryRequirement sbtMemRequirements =
4601         MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress;
4602 
4603     return makeMovePtr<BufferWithMemory>(vk, device, allocator, sbtCreateInfo, sbtMemRequirements);
4604 }
4605 
iterate(void)4606 TestStatus CopySBTInstance::iterate(void)
4607 {
4608     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
4609     const VkQueue queue             = m_context.getUniversalQueue();
4610 
4611     const de::MovePtr<RayTracingProperties> rtProps =
4612         makeRayTracingProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice());
4613     const uint32_t shaderGroupHandleSize    = rtProps->getShaderGroupHandleSize();
4614     const uint32_t shaderGroupBaseAlignment = rtProps->getShaderGroupBaseAlignment();
4615 
4616     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_params->width, m_params->height, m_format);
4617     const VkImageSubresourceRange imageSubresourceRange =
4618         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4619     const de::MovePtr<ImageWithMemory> image =
4620         makeMovePtr<ImageWithMemory>(vk, device, allocator, imageCreateInfo, MemoryRequirement::Any);
4621     const Move<VkImageView> view =
4622         makeImageView(vk, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
4623 
4624     const uint32_t bufferSize = (m_params->width * m_params->height * mapVkFormat(m_format).getPixelSize());
4625     const VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4626     de::MovePtr<BufferWithMemory> referenceImageBuffer =
4627         makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4628     de::MovePtr<BufferWithMemory> resultImageBuffer =
4629         makeMovePtr<BufferWithMemory>(vk, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible);
4630 
4631     const VkImageSubresourceLayers imageSubresourceLayers =
4632         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4633     const VkBufferImageCopy bufferCopyImageRegion =
4634         makeBufferImageCopy(makeExtent3D(m_params->width, m_params->height, 1u), imageSubresourceLayers);
4635 
4636     de::MovePtr<RayTracingPipeline> rtPipeline = makeMovePtr<RayTracingPipeline>();
4637     rtPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, *rgenShader, 0);
4638     rtPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, *chitShader, 1);
4639     rtPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, *missShader, 2);
4640 
4641     const Move<VkDescriptorPool> descriptorPool =
4642         DescriptorPoolBuilder()
4643             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
4644             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
4645             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4646     const Move<VkDescriptorSetLayout> descriptorSetLayout =
4647         DescriptorSetLayoutBuilder()
4648             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4649             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4650             .build(vk, device);
4651     const Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
4652 
4653     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout);
4654     Move<VkPipeline> pipeline                   = rtPipeline->createPipeline(vk, device, *pipelineLayout);
4655 
4656     de::MovePtr<BufferWithMemory> sourceRgenSbt = rtPipeline->createShaderBindingTable(
4657         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1, VkBufferCreateFlags(0),
4658         VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
4659     VkStridedDeviceAddressRegionKHR sourceRgenRegion = makeStridedDeviceAddressRegionKHR(
4660         getBufferDeviceAddress(vk, device, **sourceRgenSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4661     de::MovePtr<BufferWithMemory> copyRgenSbt = getBufferForSBT(1, shaderGroupHandleSize, shaderGroupBaseAlignment);
4662     VkStridedDeviceAddressRegionKHR copyRgenRegion = makeStridedDeviceAddressRegionKHR(
4663         getBufferDeviceAddress(vk, device, **copyRgenSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4664     de::MovePtr<BufferWithMemory> chitSbt = rtPipeline->createShaderBindingTable(
4665         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
4666     VkStridedDeviceAddressRegionKHR chitRegion = makeStridedDeviceAddressRegionKHR(
4667         getBufferDeviceAddress(vk, device, **chitSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4668     de::MovePtr<BufferWithMemory> missSbt = rtPipeline->createShaderBindingTable(
4669         vk, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
4670     VkStridedDeviceAddressRegionKHR missRegion = makeStridedDeviceAddressRegionKHR(
4671         getBufferDeviceAddress(vk, device, **missSbt, 0), shaderGroupHandleSize, shaderGroupHandleSize);
4672     const VkStridedDeviceAddressRegionKHR callRegion = makeStridedDeviceAddressRegionKHR(VkDeviceAddress(0), 0, 0);
4673 
4674     const VkClearValue clearValue = {{{0.1f, 0.2f, 0.3f, 0.4f}}};
4675 
4676     const VkImageMemoryBarrier2KHR preClearImageImageBarrier = makeImageMemoryBarrier2(
4677         VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, 0, VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
4678         VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image,
4679         imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4680     const VkImageMemoryBarrier2KHR postClearImageImageBarrier =
4681         makeImageMemoryBarrier2(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR,
4682                                 VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_READ_BIT_KHR,
4683                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image,
4684                                 imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4685     const VkDependencyInfoKHR preClearImageDependency  = u::makeDependency(preClearImageImageBarrier);
4686     const VkDependencyInfoKHR postClearImageDependency = u::makeDependency(postClearImageImageBarrier);
4687 
4688     const VkImageMemoryBarrier2KHR postTraceRaysImageBarrier = makeImageMemoryBarrier2(
4689         VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR, VK_ACCESS_2_SHADER_WRITE_BIT_KHR,
4690         VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_READ_BIT_KHR, VK_IMAGE_LAYOUT_GENERAL,
4691         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4692     const VkImageMemoryBarrier2KHR postCopyImageImageBarrier = makeImageMemoryBarrier2(
4693         VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR, VK_PIPELINE_STAGE_2_HOST_BIT_KHR,
4694         VK_ACCESS_2_HOST_READ_BIT_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4695         **image, imageSubresourceRange, queueFamilyIndex, queueFamilyIndex);
4696     const VkDependencyInfoKHR postTraceRaysDependency = u::makeDependency(postTraceRaysImageBarrier);
4697     const VkDependencyInfoKHR postCopyImageDependency = u::makeDependency(postCopyImageImageBarrier);
4698 
4699     const Move<VkCommandPool> cmdPool =
4700         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
4701     const Move<VkCommandBuffer> cmdBuffer =
4702         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4703 
4704     auto tlas = makeTopLevelAccelerationStructure();
4705     BlasPtr blas(makeBottomLevelAccelerationStructure().release());
4706     blas->setBuildType(m_params->build);
4707     blas->setGeometryData({{0.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}}, true, VK_GEOMETRY_OPAQUE_BIT_KHR);
4708     tlas->setBuildType(m_params->build);
4709     tlas->setInstanceCount(1);
4710     tlas->addInstance(blas, identityMatrix3x4, 0, (~0u), 0, VkGeometryInstanceFlagsKHR(0));
4711     beginCommandBuffer(vk, *cmdBuffer);
4712     blas->createAndBuild(vk, device, *cmdBuffer, allocator);
4713     tlas->createAndBuild(vk, device, *cmdBuffer, allocator);
4714     endCommandBuffer(vk, *cmdBuffer);
4715     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4716 
4717     const VkDescriptorImageInfo descriptorImageInfo =
4718         makeDescriptorImageInfo(VkSampler(), *view, VK_IMAGE_LAYOUT_GENERAL);
4719     const VkWriteDescriptorSetAccelerationStructureKHR writeDescriptorTlas{
4720         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
4721         nullptr,                                                           //  const void* pNext;
4722         1,                                                                 //  uint32_t accelerationStructureCount;
4723         tlas->getPtr() //  const VkAccelerationStructureKHR* pAccelerationStructures;
4724     };
4725 
4726     DescriptorSetUpdateBuilder()
4727         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4728                      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
4729         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
4730                      VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &writeDescriptorTlas)
4731         .update(vk, device);
4732 
4733     beginCommandBuffer(vk, *cmdBuffer);
4734     vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4735     vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
4736                              &descriptorSet.get(), 0, nullptr);
4737     vk.cmdPipelineBarrier2(*cmdBuffer, &preClearImageDependency);
4738     vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
4739                           &imageSubresourceRange);
4740     vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImageDependency);
4741     cmdTraceRays(vk, *cmdBuffer,
4742                  &sourceRgenRegion, // rgen
4743                  &missRegion,       // miss
4744                  &chitRegion,       // hit
4745                  &callRegion,       // call
4746                  m_params->width, m_params->height, 1);
4747     vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4748     vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **referenceImageBuffer, 1u,
4749                             &bufferCopyImageRegion);
4750     vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4751     endCommandBuffer(vk, *cmdBuffer);
4752     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4753 
4754     const VkBufferCopy bufferCopy{0, // VkDeviceSize srcOffset;
4755                                   0, // VkDeviceSize srcOffset;
4756                                   getBufferSizeForSBT(1, shaderGroupHandleSize, shaderGroupBaseAlignment)};
4757     const VkMemoryBarrier2KHR postCopySBTMemoryBarrier = makeMemoryBarrier2(
4758         VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR, VkAccessFlags2KHR(0), VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR,
4759         VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR);
4760     const VkDependencyInfoKHR postClearImgCopySBTDependency =
4761         u::makeDependency(postCopySBTMemoryBarrier, postClearImageImageBarrier);
4762 
4763     beginCommandBuffer(vk, *cmdBuffer);
4764     vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4765     vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
4766                              &descriptorSet.get(), 0, nullptr);
4767     vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
4768                           &imageSubresourceRange);
4769     vk.cmdCopyBuffer(*cmdBuffer, **sourceRgenSbt, **copyRgenSbt, 1, &bufferCopy);
4770     vk.cmdPipelineBarrier2(*cmdBuffer, &postClearImgCopySBTDependency);
4771     cmdTraceRays(vk, *cmdBuffer,
4772                  &copyRgenRegion, // rgen
4773                  &missRegion,     // miss
4774                  &chitRegion,     // hit
4775                  &callRegion,     // call
4776                  m_params->width, m_params->height, 1);
4777     vk.cmdPipelineBarrier2(*cmdBuffer, &postTraceRaysDependency);
4778     vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **resultImageBuffer, 1u,
4779                             &bufferCopyImageRegion);
4780     vk.cmdPipelineBarrier2(*cmdBuffer, &postCopyImageDependency);
4781     endCommandBuffer(vk, *cmdBuffer);
4782     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4783 
4784     invalidateMappedMemoryRange(vk, device, referenceImageBuffer->getAllocation().getMemory(),
4785                                 referenceImageBuffer->getAllocation().getOffset(), bufferSize);
4786     invalidateMappedMemoryRange(vk, device, resultImageBuffer->getAllocation().getMemory(),
4787                                 resultImageBuffer->getAllocation().getOffset(), bufferSize);
4788 
4789     const void *referenceImageDataPtr = referenceImageBuffer->getAllocation().getHostPtr();
4790     const void *resultImageDataPtr    = resultImageBuffer->getAllocation().getHostPtr();
4791 
4792     return (deMemCmp(referenceImageDataPtr, resultImageDataPtr, bufferSize) == 0) ? TestStatus::pass("") :
4793                                                                                     TestStatus::fail("");
4794 }
4795 
4796 class ASUpdateCase : public RayTracingASBasicTestCase
4797 {
4798 public:
4799     ASUpdateCase(tcu::TestContext &context, const char *name, const TestParams &data);
4800     ~ASUpdateCase(void);
4801 
4802     TestInstance *createInstance(Context &context) const override;
4803 };
4804 
4805 class ASUpdateInstance : public RayTracingASBasicTestInstance
4806 {
4807 public:
4808     ASUpdateInstance(Context &context, const TestParams &data);
4809     ~ASUpdateInstance(void) = default;
4810     tcu::TestStatus iterate(void) override;
4811 
4812 private:
4813     TestParams m_data;
4814 };
4815 
ASUpdateCase(tcu::TestContext & context,const char * name,const TestParams & data)4816 ASUpdateCase::ASUpdateCase(tcu::TestContext &context, const char *name, const TestParams &data)
4817     : RayTracingASBasicTestCase(context, name, data)
4818 {
4819 }
4820 
~ASUpdateCase(void)4821 ASUpdateCase::~ASUpdateCase(void)
4822 {
4823 }
4824 
createInstance(Context & context) const4825 TestInstance *ASUpdateCase::createInstance(Context &context) const
4826 {
4827     return new ASUpdateInstance(context, m_data);
4828 }
4829 
ASUpdateInstance(Context & context,const TestParams & data)4830 ASUpdateInstance::ASUpdateInstance(Context &context, const TestParams &data)
4831     : RayTracingASBasicTestInstance(context, data)
4832     , m_data(data)
4833 {
4834 }
4835 
iterate(void)4836 TestStatus ASUpdateInstance::iterate(void)
4837 {
4838     const InstanceInterface &vki            = m_context.getInstanceInterface();
4839     const DeviceInterface &vkd              = m_context.getDeviceInterface();
4840     const VkDevice device                   = m_context.getDevice();
4841     const VkPhysicalDevice physicalDevice   = m_context.getPhysicalDevice();
4842     const uint32_t queueFamilyIndex         = m_context.getUniversalQueueFamilyIndex();
4843     const VkQueue queue                     = m_context.getUniversalQueue();
4844     Allocator &allocator                    = m_context.getDefaultAllocator();
4845     const uint32_t pixelCount               = m_data.width * m_data.height;
4846     const uint32_t shaderGroupHandleSize    = getShaderGroupSize(vki, physicalDevice);
4847     const uint32_t shaderGroupBaseAlignment = getShaderGroupBaseAlignment(vki, physicalDevice);
4848 
4849     const Move<VkDescriptorSetLayout> descriptorSetLayout =
4850         DescriptorSetLayoutBuilder()
4851             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
4852             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
4853             .build(vkd, device);
4854     const Move<VkDescriptorPool> descriptorPool =
4855         DescriptorPoolBuilder()
4856             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
4857             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
4858             .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4859     const Move<VkDescriptorSet> descriptorSet   = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
4860     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
4861 
4862     de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4863     m_data.testConfiguration->initRayTracingShaders(rayTracingPipeline, m_context, m_data);
4864     Move<VkPipeline> pipeline = rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
4865 
4866     de::MovePtr<BufferWithMemory> raygenShaderBindingTable;
4867     de::MovePtr<BufferWithMemory> hitShaderBindingTable;
4868     de::MovePtr<BufferWithMemory> missShaderBindingTable;
4869     m_data.testConfiguration->initShaderBindingTables(
4870         rayTracingPipeline, m_context, m_data, *pipeline, shaderGroupHandleSize, shaderGroupBaseAlignment,
4871         raygenShaderBindingTable, hitShaderBindingTable, missShaderBindingTable);
4872 
4873     const VkStridedDeviceAddressRegionKHR raygenShaderBindingTableRegion =
4874         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0),
4875                                           shaderGroupHandleSize, shaderGroupHandleSize);
4876     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
4877         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
4878                                           shaderGroupHandleSize, shaderGroupHandleSize);
4879     const VkStridedDeviceAddressRegionKHR hitShaderBindingTableRegion =
4880         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0),
4881                                           shaderGroupHandleSize, shaderGroupHandleSize);
4882     const VkStridedDeviceAddressRegionKHR callableShaderBindingTableRegion =
4883         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
4884 
4885     const VkFormat imageFormat              = m_data.testConfiguration->getResultImageFormat();
4886     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
4887     const VkImageSubresourceRange imageSubresourceRange =
4888         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
4889     const de::MovePtr<ImageWithMemory> image = de::MovePtr<ImageWithMemory>(
4890         new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
4891     const Move<VkImageView> imageView =
4892         makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
4893 
4894     const VkBufferCreateInfo resultBufferCreateInfo = makeBufferCreateInfo(
4895         pixelCount * m_data.testConfiguration->getResultImageFormatSize(), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4896     const VkImageSubresourceLayers resultBufferImageSubresourceLayers =
4897         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4898     const VkBufferImageCopy resultBufferImageRegion =
4899         makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), resultBufferImageSubresourceLayers);
4900     de::MovePtr<BufferWithMemory> resultBuffer = de::MovePtr<BufferWithMemory>(
4901         new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
4902 
4903     const VkDescriptorImageInfo descriptorImageInfo =
4904         makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
4905 
4906     const Move<VkCommandPool> cmdPool = createCommandPool(vkd, device, 0, queueFamilyIndex);
4907     const Move<VkCommandBuffer> cmdBuffer =
4908         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4909 
4910     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelAccelerationStructures;
4911     de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure;
4912     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelAccelerationStructureCopies;
4913     de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructureCopy;
4914     std::vector<de::SharedPtr<SerialStorage>> bottomSerialized;
4915     std::vector<de::SharedPtr<SerialStorage>> topSerialized;
4916     std::vector<VkDeviceSize> accelerationCompactedSizes;
4917     std::vector<VkDeviceSize> accelerationSerialSizes;
4918     Move<VkQueryPool> m_queryPoolCompact;
4919     Move<VkQueryPool> m_queryPoolSerial;
4920 
4921     beginCommandBuffer(vkd, *cmdBuffer, 0u);
4922     {
4923         const VkImageMemoryBarrier preImageBarrier =
4924             makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
4925                                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image, imageSubresourceRange);
4926         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
4927                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
4928         const VkClearValue clearValue = m_data.testConfiguration->getClearValue();
4929         vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
4930                                &imageSubresourceRange);
4931         const VkImageMemoryBarrier postImageBarrier = makeImageMemoryBarrier(
4932             VK_ACCESS_TRANSFER_WRITE_BIT,
4933             VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
4934             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image, imageSubresourceRange);
4935         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
4936                                       VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
4937 
4938         // build bottom level acceleration structures and their copies ( only when we are testing copying bottom level acceleration structures )
4939         bool bottomCompact = m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_BOTTOM_ACCELERATION;
4940         const bool buildWithoutGeom   = (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM);
4941         const bool bottomNoPrimitives = (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM);
4942         const bool topNoPrimitives    = (m_data.emptyASCase == EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP);
4943         const bool inactiveInstances  = (m_data.emptyASCase == EmptyAccelerationStructureCase::INACTIVE_INSTANCES);
4944         bottomLevelAccelerationStructures =
4945             m_data.testConfiguration->initBottomAccelerationStructures(m_context, m_data);
4946         VkBuildAccelerationStructureFlagsKHR allowCompactionFlag =
4947             VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
4948         VkBuildAccelerationStructureFlagsKHR emptyCompactionFlag = VkBuildAccelerationStructureFlagsKHR(0);
4949         VkBuildAccelerationStructureFlagsKHR bottomCompactFlags =
4950             (bottomCompact ? allowCompactionFlag : emptyCompactionFlag);
4951         VkBuildAccelerationStructureFlagsKHR bottomBuildFlags = m_data.buildFlags | bottomCompactFlags;
4952         std::vector<VkAccelerationStructureKHR> accelerationStructureHandles;
4953         std::vector<VkDeviceSize> bottomBlasCompactSize;
4954         std::vector<VkDeviceSize> bottomBlasSerialSize;
4955 
4956         for (auto &blas : bottomLevelAccelerationStructures)
4957         {
4958             blas->setBuildType(m_data.buildType);
4959             blas->setBuildFlags(bottomBuildFlags);
4960             blas->setUseArrayOfPointers(m_data.bottomUsesAOP);
4961             blas->setCreateGeneric(m_data.bottomGeneric);
4962             blas->setCreationBufferUnbounded(m_data.bottomUnboundedCreation);
4963             blas->setBuildWithoutGeometries(buildWithoutGeom);
4964             blas->setBuildWithoutPrimitives(bottomNoPrimitives);
4965             blas->createAndBuild(vkd, device, *cmdBuffer, allocator);
4966             accelerationStructureHandles.push_back(*(blas->getPtr()));
4967         }
4968 
4969         auto bottomLevelAccelerationStructuresPtr = &bottomLevelAccelerationStructures;
4970         // build top level acceleration structures and their copies ( only when we are testing copying top level acceleration structures )
4971         bool topCompact = m_data.operationType == OP_COMPACT && m_data.operationTarget == OT_TOP_ACCELERATION;
4972         VkBuildAccelerationStructureFlagsKHR topCompactFlags = (topCompact ? allowCompactionFlag : emptyCompactionFlag);
4973         VkBuildAccelerationStructureFlagsKHR topBuildFlags   = m_data.buildFlags | topCompactFlags;
4974         std::vector<VkAccelerationStructureKHR> topLevelStructureHandles;
4975         std::vector<VkDeviceSize> topBlasCompactSize;
4976         std::vector<VkDeviceSize> topBlasSerialSize;
4977 
4978         topLevelAccelerationStructure = m_data.testConfiguration->initTopAccelerationStructure(
4979             m_context, m_data, *bottomLevelAccelerationStructuresPtr);
4980         topLevelAccelerationStructure->setBuildType(m_data.buildType);
4981         topLevelAccelerationStructure->setBuildFlags(topBuildFlags);
4982         topLevelAccelerationStructure->setBuildWithoutPrimitives(topNoPrimitives);
4983         topLevelAccelerationStructure->setUseArrayOfPointers(m_data.topUsesAOP);
4984         topLevelAccelerationStructure->setCreateGeneric(m_data.topGeneric);
4985         topLevelAccelerationStructure->setCreationBufferUnbounded(m_data.topUnboundedCreation);
4986         topLevelAccelerationStructure->setInactiveInstances(inactiveInstances);
4987         topLevelAccelerationStructure->createAndBuild(vkd, device, *cmdBuffer, allocator);
4988         topLevelStructureHandles.push_back(*(topLevelAccelerationStructure->getPtr()));
4989 
4990         const VkMemoryBarrier postBuildBarrier = makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
4991                                                                    VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR);
4992         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
4993                                  VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postBuildBarrier);
4994 
4995         if (m_data.updateCase == UpdateCase::VERTICES)
4996         {
4997             for (auto &blas : bottomLevelAccelerationStructures)
4998             {
4999                 const std::vector<tcu::Vec3> vertices = {
5000                     tcu::Vec3(0.0f, 0.0f, -0.5f),
5001                     tcu::Vec3(0.5f, 0.0f, -0.5f),
5002                     tcu::Vec3(0.0f, 0.5f, -0.5f),
5003                 };
5004                 const std::vector<uint32_t> indices = {0, 1, 2};
5005                 de::SharedPtr<RaytracedGeometryBase> geometry;
5006                 geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, m_data.vertexFormat, m_data.indexType);
5007 
5008                 for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
5009                     geometry->addVertex(*it);
5010 
5011                 if (m_data.indexType != VK_INDEX_TYPE_NONE_KHR)
5012                 {
5013                     for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
5014                         geometry->addIndex(*it);
5015                 }
5016                 blas->updateGeometry(0, geometry);
5017                 blas->build(vkd, device, *cmdBuffer, blas.get());
5018             }
5019         }
5020         else if (m_data.updateCase == UpdateCase::INDICES)
5021         {
5022             for (auto &blas : bottomLevelAccelerationStructures)
5023             {
5024                 const std::vector<tcu::Vec3> vertices = {
5025                     tcu::Vec3(0.0f, 0.0f, 0.0f),  tcu::Vec3(0.5f, 0.0f, 0.0f),  tcu::Vec3(0.0f, 0.5f, 0.0f),
5026                     tcu::Vec3(0.0f, 0.0f, -0.5f), tcu::Vec3(0.5f, 0.0f, -0.5f), tcu::Vec3(0.0f, 0.5f, -0.5f),
5027                 };
5028 
5029                 const std::vector<uint32_t> indices = {3, 4, 5};
5030                 de::SharedPtr<RaytracedGeometryBase> geometry;
5031                 geometry = makeRaytracedGeometry(VK_GEOMETRY_TYPE_TRIANGLES_KHR, m_data.vertexFormat, m_data.indexType);
5032 
5033                 for (auto it = begin(vertices), eit = end(vertices); it != eit; ++it)
5034                     geometry->addVertex(*it);
5035 
5036                 if (m_data.indexType != VK_INDEX_TYPE_NONE_KHR)
5037                 {
5038                     for (auto it = begin(indices), eit = end(indices); it != eit; ++it)
5039                         geometry->addIndex(*it);
5040                 }
5041                 blas->updateGeometry(0, geometry);
5042                 blas->build(vkd, device, *cmdBuffer, blas.get());
5043             }
5044         }
5045         else if (m_data.updateCase == UpdateCase::TRANSFORM)
5046         {
5047             const VkTransformMatrixKHR translatedMatrix = {
5048                 {{1.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 1.0f, -0.5f}}};
5049             topLevelAccelerationStructure->updateInstanceMatrix(vkd, device, 0, translatedMatrix);
5050             topLevelAccelerationStructure->build(vkd, device, *cmdBuffer, topLevelAccelerationStructure.get());
5051         }
5052 
5053         const TopLevelAccelerationStructure *topLevelRayTracedPtr = topLevelAccelerationStructure.get();
5054         const VkMemoryBarrier preTraceMemoryBarrier =
5055             makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
5056                               VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
5057         cmdPipelineMemoryBarrier(
5058             vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
5059             VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &preTraceMemoryBarrier);
5060 
5061         VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
5062             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
5063             DE_NULL,                                                           //  const void* pNext;
5064             1u,                                                                //  uint32_t accelerationStructureCount;
5065             topLevelRayTracedPtr->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
5066         };
5067 
5068         DescriptorSetUpdateBuilder()
5069             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
5070                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
5071             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
5072                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
5073             .update(vkd, device);
5074 
5075         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
5076                                   &descriptorSet.get(), 0, DE_NULL);
5077 
5078         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
5079 
5080         cmdTraceRays(vkd, *cmdBuffer, &raygenShaderBindingTableRegion, &missShaderBindingTableRegion,
5081                      &hitShaderBindingTableRegion, &callableShaderBindingTableRegion, m_data.width, m_data.height, 1);
5082 
5083         const VkMemoryBarrier postTraceMemoryBarrier =
5084             makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
5085         const VkMemoryBarrier postCopyMemoryBarrier =
5086             makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
5087         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
5088                                  VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
5089 
5090         vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u,
5091                                  &resultBufferImageRegion);
5092 
5093         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
5094                                  &postCopyMemoryBarrier);
5095     }
5096     endCommandBuffer(vkd, *cmdBuffer);
5097 
5098     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
5099 
5100     invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(),
5101                                 resultBuffer->getAllocation().getOffset(), pixelCount * sizeof(uint32_t));
5102 
5103     bool result = m_data.testConfiguration->verifyImage(resultBuffer.get(), m_context, m_data);
5104 
5105     if (result)
5106         return tcu::TestStatus::pass("Pass");
5107     else
5108         return tcu::TestStatus::fail("Fail");
5109 }
5110 
5111 } // namespace
5112 
addBasicBuildingTests(tcu::TestCaseGroup * group)5113 void addBasicBuildingTests(tcu::TestCaseGroup *group)
5114 {
5115     struct
5116     {
5117         vk::VkAccelerationStructureBuildTypeKHR buildType;
5118         const char *name;
5119     } buildTypes[] = {
5120         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5121         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5122     };
5123 
5124     struct
5125     {
5126         BottomTestType testType;
5127         bool usesAOP;
5128         const char *name;
5129     } bottomTestTypes[] = {
5130         {BottomTestType::TRIANGLES, false, "triangles"},
5131         {BottomTestType::TRIANGLES, true, "triangles_aop"},
5132         {BottomTestType::AABBS, false, "aabbs"},
5133         {BottomTestType::AABBS, true, "aabbs_aop"},
5134     };
5135 
5136     struct
5137     {
5138         TopTestType testType;
5139         bool usesAOP;
5140         const char *name;
5141     } topTestTypes[] = {
5142         {TopTestType::IDENTICAL_INSTANCES, false, "identical_instances"},
5143         {TopTestType::IDENTICAL_INSTANCES, true, "identical_instances_aop"},
5144         {TopTestType::DIFFERENT_INSTANCES, false, "different_instances"},
5145         {TopTestType::DIFFERENT_INSTANCES, true, "different_instances_aop"},
5146     };
5147 
5148     struct BuildFlagsData
5149     {
5150         VkBuildAccelerationStructureFlagsKHR flags;
5151         const char *name;
5152     };
5153 
5154     BuildFlagsData optimizationTypes[] = {
5155         {VkBuildAccelerationStructureFlagsKHR(0u), "0"},
5156         {VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, "fasttrace"},
5157         {VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR, "fastbuild"},
5158     };
5159 
5160     BuildFlagsData updateTypes[] = {
5161         {VkBuildAccelerationStructureFlagsKHR(0u), "0"},
5162         {VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR, "update"},
5163     };
5164 
5165     BuildFlagsData compactionTypes[] = {
5166         {VkBuildAccelerationStructureFlagsKHR(0u), "0"},
5167         {VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR, "compaction"},
5168     };
5169 
5170     BuildFlagsData lowMemoryTypes[] = {
5171         {VkBuildAccelerationStructureFlagsKHR(0u), "0"},
5172         {VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR, "lowmemory"},
5173     };
5174 
5175     struct
5176     {
5177         bool padVertices;
5178         const char *name;
5179     } paddingType[] = {
5180         {false, "nopadding"},
5181         {true, "padded"},
5182     };
5183 
5184     struct
5185     {
5186         bool topGeneric;
5187         bool bottomGeneric;
5188         const char *suffix;
5189     } createGenericParams[] = {
5190         {false, false, ""},
5191         {false, true, "_bottomgeneric"},
5192         {true, false, "_topgeneric"},
5193         {true, true, "_bothgeneric"},
5194     };
5195 
5196     // In order not to create thousands of new test variants for unbound buffer memory on acceleration structure creation, we will
5197     // set these options on some of the tests.
5198     de::ModCounter32 unboundedCreationBottomCounter(3u);
5199     de::ModCounter32 unboundedCreationTopCounter(7u);
5200 
5201     for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
5202     {
5203         de::MovePtr<tcu::TestCaseGroup> buildGroup(
5204             new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name));
5205 
5206         for (size_t bottomNdx = 0; bottomNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++bottomNdx)
5207         {
5208             de::MovePtr<tcu::TestCaseGroup> bottomGroup(
5209                 new tcu::TestCaseGroup(group->getTestContext(), bottomTestTypes[bottomNdx].name));
5210 
5211             for (size_t topNdx = 0; topNdx < DE_LENGTH_OF_ARRAY(topTestTypes); ++topNdx)
5212             {
5213                 de::MovePtr<tcu::TestCaseGroup> topGroup(
5214                     new tcu::TestCaseGroup(group->getTestContext(), topTestTypes[topNdx].name));
5215 
5216                 for (int paddingTypeIdx = 0; paddingTypeIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingTypeIdx)
5217                 {
5218                     de::MovePtr<tcu::TestCaseGroup> paddingGroup(
5219                         new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingTypeIdx].name));
5220 
5221                     for (size_t optimizationNdx = 0; optimizationNdx < DE_LENGTH_OF_ARRAY(optimizationTypes);
5222                          ++optimizationNdx)
5223                     {
5224                         for (size_t updateNdx = 0; updateNdx < DE_LENGTH_OF_ARRAY(updateTypes); ++updateNdx)
5225                         {
5226                             for (size_t compactionNdx = 0; compactionNdx < DE_LENGTH_OF_ARRAY(compactionTypes);
5227                                  ++compactionNdx)
5228                             {
5229                                 for (size_t lowMemoryNdx = 0; lowMemoryNdx < DE_LENGTH_OF_ARRAY(lowMemoryTypes);
5230                                      ++lowMemoryNdx)
5231                                 {
5232                                     for (int createGenericIdx = 0;
5233                                          createGenericIdx < DE_LENGTH_OF_ARRAY(createGenericParams); ++createGenericIdx)
5234                                     {
5235                                         std::string testName =
5236                                             std::string(optimizationTypes[optimizationNdx].name) + "_" +
5237                                             std::string(updateTypes[updateNdx].name) + "_" +
5238                                             std::string(compactionTypes[compactionNdx].name) + "_" +
5239                                             std::string(lowMemoryTypes[lowMemoryNdx].name) +
5240                                             std::string(createGenericParams[createGenericIdx].suffix);
5241 
5242                                         const bool unboundedCreationBottom =
5243                                             (static_cast<uint32_t>(unboundedCreationBottomCounter++) == 0u);
5244                                         const bool unboundedCreationTop =
5245                                             (static_cast<uint32_t>(unboundedCreationTopCounter++) == 0u);
5246 
5247                                         TestParams testParams{
5248                                             buildTypes[buildTypeNdx].buildType,
5249                                             VK_FORMAT_R32G32B32_SFLOAT,
5250                                             paddingType[paddingTypeIdx].padVertices,
5251                                             VK_INDEX_TYPE_NONE_KHR,
5252                                             bottomTestTypes[bottomNdx].testType,
5253                                             InstanceCullFlags::NONE,
5254                                             bottomTestTypes[bottomNdx].usesAOP,
5255                                             createGenericParams[createGenericIdx].bottomGeneric,
5256                                             unboundedCreationBottom,
5257                                             topTestTypes[topNdx].testType,
5258                                             topTestTypes[topNdx].usesAOP,
5259                                             createGenericParams[createGenericIdx].topGeneric,
5260                                             unboundedCreationTop,
5261                                             optimizationTypes[optimizationNdx].flags | updateTypes[updateNdx].flags |
5262                                                 compactionTypes[compactionNdx].flags |
5263                                                 lowMemoryTypes[lowMemoryNdx].flags,
5264                                             OT_NONE,
5265                                             OP_NONE,
5266                                             RTAS_DEFAULT_SIZE,
5267                                             RTAS_DEFAULT_SIZE,
5268                                             de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5269                                             0u,
5270                                             EmptyAccelerationStructureCase::NOT_EMPTY,
5271                                             InstanceCustomIndexCase::NONE,
5272                                             false,
5273                                             0xFFu,
5274                                             UpdateCase::NONE,
5275                                         };
5276                                         paddingGroup->addChild(new RayTracingASBasicTestCase(
5277                                             group->getTestContext(), testName.c_str(), testParams));
5278                                     }
5279                                 }
5280                             }
5281                         }
5282                     }
5283                     topGroup->addChild(paddingGroup.release());
5284                 }
5285                 bottomGroup->addChild(topGroup.release());
5286             }
5287             buildGroup->addChild(bottomGroup.release());
5288         }
5289         group->addChild(buildGroup.release());
5290     }
5291 }
5292 
addVertexIndexFormatsTests(tcu::TestCaseGroup * group)5293 void addVertexIndexFormatsTests(tcu::TestCaseGroup *group)
5294 {
5295     struct
5296     {
5297         vk::VkAccelerationStructureBuildTypeKHR buildType;
5298         const char *name;
5299     } buildTypes[] = {
5300         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5301         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5302     };
5303 
5304     const VkFormat vertexFormats[] = {
5305         // Mandatory formats.
5306         VK_FORMAT_R32G32_SFLOAT,
5307         VK_FORMAT_R32G32B32_SFLOAT,
5308         VK_FORMAT_R16G16_SFLOAT,
5309         VK_FORMAT_R16G16B16A16_SFLOAT,
5310         VK_FORMAT_R16G16_SNORM,
5311         VK_FORMAT_R16G16B16A16_SNORM,
5312 
5313         // Additional formats.
5314         VK_FORMAT_R8G8_SNORM,
5315         VK_FORMAT_R8G8B8_SNORM,
5316         VK_FORMAT_R8G8B8A8_SNORM,
5317         VK_FORMAT_R16G16B16_SNORM,
5318         VK_FORMAT_R16G16B16_SFLOAT,
5319         VK_FORMAT_R32G32B32A32_SFLOAT,
5320         VK_FORMAT_R64G64_SFLOAT,
5321         VK_FORMAT_R64G64B64_SFLOAT,
5322         VK_FORMAT_R64G64B64A64_SFLOAT,
5323     };
5324 
5325     struct
5326     {
5327         VkIndexType indexType;
5328         const char *name;
5329     } indexFormats[] = {
5330         {VK_INDEX_TYPE_NONE_KHR, "index_none"},
5331         {VK_INDEX_TYPE_UINT16, "index_uint16"},
5332         {VK_INDEX_TYPE_UINT32, "index_uint32"},
5333     };
5334 
5335     struct
5336     {
5337         bool padVertices;
5338         const char *name;
5339     } paddingType[] = {
5340         {false, "nopadding"},
5341         {true, "padded"},
5342     };
5343 
5344     for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
5345     {
5346         de::MovePtr<tcu::TestCaseGroup> buildGroup(
5347             new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name));
5348 
5349         for (size_t vertexFormatNdx = 0; vertexFormatNdx < DE_LENGTH_OF_ARRAY(vertexFormats); ++vertexFormatNdx)
5350         {
5351             const auto format     = vertexFormats[vertexFormatNdx];
5352             const auto formatName = getFormatSimpleName(format);
5353 
5354             de::MovePtr<tcu::TestCaseGroup> vertexFormatGroup(
5355                 new tcu::TestCaseGroup(group->getTestContext(), formatName.c_str()));
5356 
5357             for (int paddingIdx = 0; paddingIdx < DE_LENGTH_OF_ARRAY(paddingType); ++paddingIdx)
5358             {
5359                 de::MovePtr<tcu::TestCaseGroup> paddingGroup(
5360                     new tcu::TestCaseGroup(group->getTestContext(), paddingType[paddingIdx].name));
5361 
5362                 for (size_t indexFormatNdx = 0; indexFormatNdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatNdx)
5363                 {
5364                     TestParams testParams{
5365                         buildTypes[buildTypeNdx].buildType,
5366                         format,
5367                         paddingType[paddingIdx].padVertices,
5368                         indexFormats[indexFormatNdx].indexType,
5369                         BottomTestType::TRIANGLES,
5370                         InstanceCullFlags::NONE,
5371                         false,
5372                         false,
5373                         false,
5374                         TopTestType::IDENTICAL_INSTANCES,
5375                         false,
5376                         false,
5377                         false,
5378                         VkBuildAccelerationStructureFlagsKHR(0u),
5379                         OT_NONE,
5380                         OP_NONE,
5381                         RTAS_DEFAULT_SIZE,
5382                         RTAS_DEFAULT_SIZE,
5383                         de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5384                         0u,
5385                         EmptyAccelerationStructureCase::NOT_EMPTY,
5386                         InstanceCustomIndexCase::NONE,
5387                         false,
5388                         0xFFu,
5389                         UpdateCase::NONE,
5390                     };
5391                     paddingGroup->addChild(new RayTracingASBasicTestCase(
5392                         group->getTestContext(), indexFormats[indexFormatNdx].name, testParams));
5393                 }
5394                 vertexFormatGroup->addChild(paddingGroup.release());
5395             }
5396             buildGroup->addChild(vertexFormatGroup.release());
5397         }
5398         group->addChild(buildGroup.release());
5399     }
5400 }
5401 
addOperationTestsImpl(tcu::TestCaseGroup * group,const uint32_t workerThreads)5402 void addOperationTestsImpl(tcu::TestCaseGroup *group, const uint32_t workerThreads)
5403 {
5404     struct
5405     {
5406         OperationType operationType;
5407         const char *name;
5408     } operationTypes[] = {
5409         {OP_COPY, "copy"},
5410         {OP_COMPACT, "compaction"},
5411         {OP_SERIALIZE, "serialization"},
5412     };
5413 
5414     struct
5415     {
5416         vk::VkAccelerationStructureBuildTypeKHR buildType;
5417         const char *name;
5418     } buildTypes[] = {
5419         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5420         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5421     };
5422 
5423     struct
5424     {
5425         OperationTarget operationTarget;
5426         const char *name;
5427     } operationTargets[] = {
5428         {OT_TOP_ACCELERATION, "top_acceleration_structure"},
5429         {OT_BOTTOM_ACCELERATION, "bottom_acceleration_structure"},
5430     };
5431 
5432     struct
5433     {
5434         BottomTestType testType;
5435         const char *name;
5436     } bottomTestTypes[] = {
5437         {BottomTestType::TRIANGLES, "triangles"},
5438         {BottomTestType::AABBS, "aabbs"},
5439     };
5440 
5441     for (size_t operationTypeNdx = 0; operationTypeNdx < DE_LENGTH_OF_ARRAY(operationTypes); ++operationTypeNdx)
5442     {
5443         if (workerThreads > 0)
5444             if (operationTypes[operationTypeNdx].operationType != OP_COPY &&
5445                 operationTypes[operationTypeNdx].operationType != OP_SERIALIZE)
5446                 continue;
5447 
5448         de::MovePtr<tcu::TestCaseGroup> operationTypeGroup(
5449             new tcu::TestCaseGroup(group->getTestContext(), operationTypes[operationTypeNdx].name));
5450 
5451         for (size_t buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
5452         {
5453             if (workerThreads > 0 &&
5454                 buildTypes[buildTypeNdx].buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
5455                 continue;
5456 
5457             de::MovePtr<tcu::TestCaseGroup> buildGroup(
5458                 new tcu::TestCaseGroup(group->getTestContext(), buildTypes[buildTypeNdx].name));
5459 
5460             for (size_t operationTargetNdx = 0; operationTargetNdx < DE_LENGTH_OF_ARRAY(operationTargets);
5461                  ++operationTargetNdx)
5462             {
5463                 de::MovePtr<tcu::TestCaseGroup> operationTargetGroup(
5464                     new tcu::TestCaseGroup(group->getTestContext(), operationTargets[operationTargetNdx].name));
5465 
5466                 for (size_t testTypeNdx = 0; testTypeNdx < DE_LENGTH_OF_ARRAY(bottomTestTypes); ++testTypeNdx)
5467                 {
5468                     TopTestType topTest =
5469                         (operationTargets[operationTargetNdx].operationTarget == OT_TOP_ACCELERATION) ?
5470                             TopTestType::DIFFERENT_INSTANCES :
5471                             TopTestType::IDENTICAL_INSTANCES;
5472 
5473                     TestParams testParams{
5474                         buildTypes[buildTypeNdx].buildType,
5475                         VK_FORMAT_R32G32B32_SFLOAT,
5476                         false,
5477                         VK_INDEX_TYPE_NONE_KHR,
5478                         bottomTestTypes[testTypeNdx].testType,
5479                         InstanceCullFlags::NONE,
5480                         false,
5481                         false,
5482                         false,
5483                         topTest,
5484                         false,
5485                         false,
5486                         false,
5487                         VkBuildAccelerationStructureFlagsKHR(0u),
5488                         operationTargets[operationTargetNdx].operationTarget,
5489                         operationTypes[operationTypeNdx].operationType,
5490                         RTAS_DEFAULT_SIZE,
5491                         RTAS_DEFAULT_SIZE,
5492                         de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5493                         workerThreads,
5494                         EmptyAccelerationStructureCase::NOT_EMPTY,
5495                         InstanceCustomIndexCase::NONE,
5496                         false,
5497                         0xFFu,
5498                         UpdateCase::NONE,
5499                     };
5500                     operationTargetGroup->addChild(new RayTracingASBasicTestCase(
5501                         group->getTestContext(), bottomTestTypes[testTypeNdx].name, testParams));
5502                 }
5503                 buildGroup->addChild(operationTargetGroup.release());
5504             }
5505             operationTypeGroup->addChild(buildGroup.release());
5506         }
5507         group->addChild(operationTypeGroup.release());
5508     }
5509 }
5510 
addOperationTests(tcu::TestCaseGroup * group)5511 void addOperationTests(tcu::TestCaseGroup *group)
5512 {
5513     addOperationTestsImpl(group, 0);
5514 }
5515 
addHostThreadingOperationTests(tcu::TestCaseGroup * group)5516 void addHostThreadingOperationTests(tcu::TestCaseGroup *group)
5517 {
5518     const uint32_t threads[] = {1, 2, 3, 4, 8, std::numeric_limits<uint32_t>::max()};
5519 
5520     for (size_t threadsNdx = 0; threadsNdx < DE_LENGTH_OF_ARRAY(threads); ++threadsNdx)
5521     {
5522         const std::string groupName =
5523             threads[threadsNdx] != std::numeric_limits<uint32_t>::max() ? de::toString(threads[threadsNdx]) : "max";
5524 
5525         de::MovePtr<tcu::TestCaseGroup> threadGroup(new tcu::TestCaseGroup(group->getTestContext(), groupName.c_str()));
5526 
5527         addOperationTestsImpl(threadGroup.get(), threads[threadsNdx]);
5528 
5529         group->addChild(threadGroup.release());
5530     }
5531 }
5532 
addFuncArgTests(tcu::TestCaseGroup * group)5533 void addFuncArgTests(tcu::TestCaseGroup *group)
5534 {
5535     const struct
5536     {
5537         vk::VkAccelerationStructureBuildTypeKHR buildType;
5538         const char *name;
5539     } buildTypes[] = {
5540         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5541         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5542     };
5543 
5544     auto &ctx = group->getTestContext();
5545 
5546     for (int buildTypeNdx = 0; buildTypeNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeNdx)
5547     {
5548         TestParams testParams{
5549             buildTypes[buildTypeNdx].buildType,
5550             VK_FORMAT_R32G32B32_SFLOAT,
5551             false,
5552             VK_INDEX_TYPE_NONE_KHR,
5553             BottomTestType::TRIANGLES,
5554             InstanceCullFlags::NONE,
5555             false,
5556             false,
5557             false,
5558             TopTestType::IDENTICAL_INSTANCES,
5559             false,
5560             false,
5561             false,
5562             VkBuildAccelerationStructureFlagsKHR(0u),
5563             OT_NONE,
5564             OP_NONE,
5565             RTAS_DEFAULT_SIZE,
5566             RTAS_DEFAULT_SIZE,
5567             de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5568             0u,
5569             EmptyAccelerationStructureCase::NOT_EMPTY,
5570             InstanceCustomIndexCase::NONE,
5571             false,
5572             0xFFu,
5573             UpdateCase::NONE,
5574         };
5575 
5576         group->addChild(new RayTracingASFuncArgTestCase(ctx, buildTypes[buildTypeNdx].name, testParams));
5577     }
5578 }
5579 
addInstanceTriangleCullingTests(tcu::TestCaseGroup * group)5580 void addInstanceTriangleCullingTests(tcu::TestCaseGroup *group)
5581 {
5582     const struct
5583     {
5584         InstanceCullFlags cullFlags;
5585         std::string name;
5586     } cullFlags[] = {
5587         {InstanceCullFlags::NONE, "noflags"},
5588         {InstanceCullFlags::COUNTERCLOCKWISE, "ccw"},
5589         {InstanceCullFlags::CULL_DISABLE, "nocull"},
5590         {InstanceCullFlags::ALL, "ccw_nocull"},
5591     };
5592 
5593     const struct
5594     {
5595         TopTestType topType;
5596         std::string name;
5597     } topType[] = {
5598         {TopTestType::DIFFERENT_INSTANCES, "transformed"}, // Each instance has its own transformation matrix.
5599         {TopTestType::IDENTICAL_INSTANCES, "notransform"}, // "Identical" instances, different geometries.
5600     };
5601 
5602     const struct
5603     {
5604         vk::VkAccelerationStructureBuildTypeKHR buildType;
5605         std::string name;
5606     } buildTypes[] = {
5607         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5608         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5609     };
5610 
5611     const struct
5612     {
5613         VkIndexType indexType;
5614         std::string name;
5615     } indexFormats[] = {
5616         {VK_INDEX_TYPE_NONE_KHR, "index_none"},
5617         {VK_INDEX_TYPE_UINT16, "index_uint16"},
5618         {VK_INDEX_TYPE_UINT32, "index_uint32"},
5619     };
5620 
5621     auto &ctx = group->getTestContext();
5622 
5623     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5624     {
5625         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
5626             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5627 
5628         for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
5629         {
5630             de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(
5631                 new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str()));
5632 
5633             for (int topTypeIdx = 0; topTypeIdx < DE_LENGTH_OF_ARRAY(topType); ++topTypeIdx)
5634             {
5635                 for (int cullFlagsIdx = 0; cullFlagsIdx < DE_LENGTH_OF_ARRAY(cullFlags); ++cullFlagsIdx)
5636                 {
5637                     const std::string testName = topType[topTypeIdx].name + "_" + cullFlags[cullFlagsIdx].name;
5638 
5639                     TestParams testParams{
5640                         buildTypes[buildTypeIdx].buildType,
5641                         VK_FORMAT_R32G32B32_SFLOAT,
5642                         false,
5643                         indexFormats[indexFormatIdx].indexType,
5644                         BottomTestType::TRIANGLES,
5645                         cullFlags[cullFlagsIdx].cullFlags,
5646                         false,
5647                         false,
5648                         false,
5649                         topType[topTypeIdx].topType,
5650                         false,
5651                         false,
5652                         false,
5653                         VkBuildAccelerationStructureFlagsKHR(0u),
5654                         OT_NONE,
5655                         OP_NONE,
5656                         RTAS_DEFAULT_SIZE,
5657                         RTAS_DEFAULT_SIZE,
5658                         de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5659                         0u,
5660                         EmptyAccelerationStructureCase::NOT_EMPTY,
5661                         InstanceCustomIndexCase::NONE,
5662                         false,
5663                         0xFFu,
5664                         UpdateCase::NONE,
5665                     };
5666                     indexTypeGroup->addChild(new RayTracingASBasicTestCase(ctx, testName.c_str(), testParams));
5667                 }
5668             }
5669             buildTypeGroup->addChild(indexTypeGroup.release());
5670         }
5671         group->addChild(buildTypeGroup.release());
5672     }
5673 }
5674 
addDynamicIndexingTests(tcu::TestCaseGroup * group)5675 void addDynamicIndexingTests(tcu::TestCaseGroup *group)
5676 {
5677     auto &ctx = group->getTestContext();
5678     group->addChild(new RayTracingASDynamicIndexingTestCase(ctx, "dynamic_indexing"));
5679 }
5680 
addEmptyAccelerationStructureTests(tcu::TestCaseGroup * group)5681 void addEmptyAccelerationStructureTests(tcu::TestCaseGroup *group)
5682 {
5683     const struct
5684     {
5685         vk::VkAccelerationStructureBuildTypeKHR buildType;
5686         std::string name;
5687     } buildTypes[] = {
5688         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5689         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5690     };
5691 
5692     const struct
5693     {
5694         VkIndexType indexType;
5695         std::string name;
5696     } indexFormats[] = {
5697         {VK_INDEX_TYPE_NONE_KHR, "index_none"},
5698         {VK_INDEX_TYPE_UINT16, "index_uint16"},
5699         {VK_INDEX_TYPE_UINT32, "index_uint32"},
5700     };
5701 
5702     const struct
5703     {
5704         EmptyAccelerationStructureCase emptyASCase;
5705         std::string name;
5706     } emptyCases[] = {
5707         {EmptyAccelerationStructureCase::INACTIVE_TRIANGLES, "inactive_triangles"},
5708         {EmptyAccelerationStructureCase::INACTIVE_INSTANCES, "inactive_instances"},
5709         {EmptyAccelerationStructureCase::NO_GEOMETRIES_BOTTOM, "no_geometries_bottom"},
5710         {EmptyAccelerationStructureCase::NO_PRIMITIVES_TOP, "no_primitives_top"},
5711         {EmptyAccelerationStructureCase::NO_PRIMITIVES_BOTTOM, "no_primitives_bottom"},
5712     };
5713 
5714     auto &ctx = group->getTestContext();
5715 
5716     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5717     {
5718         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
5719             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5720 
5721         for (int indexFormatIdx = 0; indexFormatIdx < DE_LENGTH_OF_ARRAY(indexFormats); ++indexFormatIdx)
5722         {
5723             de::MovePtr<tcu::TestCaseGroup> indexTypeGroup(
5724                 new tcu::TestCaseGroup(ctx, indexFormats[indexFormatIdx].name.c_str()));
5725 
5726             for (int emptyCaseIdx = 0; emptyCaseIdx < DE_LENGTH_OF_ARRAY(emptyCases); ++emptyCaseIdx)
5727             {
5728 
5729                 TestParams testParams{
5730                     buildTypes[buildTypeIdx].buildType,
5731                     VK_FORMAT_R32G32B32_SFLOAT,
5732                     false,
5733                     indexFormats[indexFormatIdx].indexType,
5734                     BottomTestType::TRIANGLES,
5735                     InstanceCullFlags::NONE,
5736                     false,
5737                     false,
5738                     false,
5739                     TopTestType::IDENTICAL_INSTANCES,
5740                     false,
5741                     false,
5742                     false,
5743                     VkBuildAccelerationStructureFlagsKHR(0u),
5744                     OT_NONE,
5745                     OP_NONE,
5746                     RTAS_DEFAULT_SIZE,
5747                     RTAS_DEFAULT_SIZE,
5748                     de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5749                     0u,
5750                     emptyCases[emptyCaseIdx].emptyASCase,
5751                     InstanceCustomIndexCase::NONE,
5752                     false,
5753                     0xFFu,
5754                     UpdateCase::NONE,
5755                 };
5756                 indexTypeGroup->addChild(
5757                     new RayTracingASBasicTestCase(ctx, emptyCases[emptyCaseIdx].name.c_str(), testParams));
5758             }
5759             buildTypeGroup->addChild(indexTypeGroup.release());
5760         }
5761         group->addChild(buildTypeGroup.release());
5762     }
5763 }
5764 
addInstanceIndexTests(tcu::TestCaseGroup * group)5765 void addInstanceIndexTests(tcu::TestCaseGroup *group)
5766 {
5767     const struct
5768     {
5769         vk::VkAccelerationStructureBuildTypeKHR buildType;
5770         std::string name;
5771     } buildTypes[] = {
5772         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5773         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5774     };
5775 
5776     const struct
5777     {
5778         InstanceCustomIndexCase customIndexCase;
5779         std::string name;
5780     } customIndexCases[] = {
5781         {InstanceCustomIndexCase::NONE, "no_instance_index"},
5782         {InstanceCustomIndexCase::ANY_HIT, "ahit"},
5783         {InstanceCustomIndexCase::CLOSEST_HIT, "chit"},
5784         {InstanceCustomIndexCase::INTERSECTION, "isec"},
5785     };
5786 
5787     auto &ctx = group->getTestContext();
5788 
5789     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5790     {
5791         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
5792             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5793 
5794         for (int customIndexCaseIdx = 0; customIndexCaseIdx < DE_LENGTH_OF_ARRAY(customIndexCases);
5795              ++customIndexCaseIdx)
5796         {
5797             const auto &idxCase = customIndexCases[customIndexCaseIdx].customIndexCase;
5798             const auto bottomGeometryType =
5799                 ((idxCase == InstanceCustomIndexCase::INTERSECTION) ? BottomTestType::AABBS :
5800                                                                       BottomTestType::TRIANGLES);
5801 
5802             TestParams testParams{
5803                 buildTypes[buildTypeIdx].buildType,
5804                 VK_FORMAT_R32G32B32_SFLOAT,
5805                 false,
5806                 VK_INDEX_TYPE_NONE_KHR,
5807                 bottomGeometryType,
5808                 InstanceCullFlags::NONE,
5809                 false,
5810                 false,
5811                 false,
5812                 TopTestType::IDENTICAL_INSTANCES,
5813                 false,
5814                 false,
5815                 false,
5816                 VkBuildAccelerationStructureFlagsKHR(0u),
5817                 OT_NONE,
5818                 OP_NONE,
5819                 RTAS_DEFAULT_SIZE,
5820                 RTAS_DEFAULT_SIZE,
5821                 de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5822                 0u,
5823                 EmptyAccelerationStructureCase::NOT_EMPTY,
5824                 customIndexCases[customIndexCaseIdx].customIndexCase,
5825                 false,
5826                 0xFFu,
5827                 UpdateCase::NONE,
5828             };
5829             buildTypeGroup->addChild(
5830                 new RayTracingASBasicTestCase(ctx, customIndexCases[customIndexCaseIdx].name.c_str(), testParams));
5831         }
5832         group->addChild(buildTypeGroup.release());
5833     }
5834 }
5835 
addInstanceUpdateTests(tcu::TestCaseGroup * group)5836 void addInstanceUpdateTests(tcu::TestCaseGroup *group)
5837 {
5838     const struct
5839     {
5840         vk::VkAccelerationStructureBuildTypeKHR buildType;
5841         std::string name;
5842     } buildTypes[] = {
5843         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5844         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5845     };
5846 
5847     struct
5848     {
5849         OperationType operationType;
5850         const char *name;
5851     } operationTypes[] = {
5852         {OP_UPDATE, "update"},
5853         {OP_UPDATE_IN_PLACE, "update_in_place"},
5854     };
5855 
5856     auto &ctx = group->getTestContext();
5857 
5858     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5859     {
5860         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
5861             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5862 
5863         for (int operationTypesIdx = 0; operationTypesIdx < DE_LENGTH_OF_ARRAY(operationTypes); ++operationTypesIdx)
5864         {
5865             TestParams testParams{
5866                 buildTypes[buildTypeIdx].buildType,
5867                 VK_FORMAT_R32G32B32_SFLOAT,
5868                 false,
5869                 VK_INDEX_TYPE_NONE_KHR,
5870                 BottomTestType::TRIANGLES,
5871                 InstanceCullFlags::NONE,
5872                 false,
5873                 false,
5874                 false,
5875                 TopTestType::IDENTICAL_INSTANCES,
5876                 false,
5877                 false,
5878                 false,
5879                 VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,
5880                 OT_TOP_ACCELERATION,
5881                 operationTypes[operationTypesIdx].operationType,
5882                 RTAS_DEFAULT_SIZE,
5883                 RTAS_DEFAULT_SIZE,
5884                 de::SharedPtr<TestConfiguration>(new SingleTriangleConfiguration()),
5885                 0u,
5886                 EmptyAccelerationStructureCase::NOT_EMPTY,
5887                 InstanceCustomIndexCase::NONE,
5888                 false,
5889                 0xFFu,
5890                 UpdateCase::NONE,
5891             };
5892             buildTypeGroup->addChild(
5893                 new RayTracingASBasicTestCase(ctx, operationTypes[operationTypesIdx].name, testParams));
5894         }
5895         group->addChild(buildTypeGroup.release());
5896     }
5897 }
5898 
addInstanceRayCullMaskTests(tcu::TestCaseGroup * group)5899 void addInstanceRayCullMaskTests(tcu::TestCaseGroup *group)
5900 {
5901     const struct
5902     {
5903         vk::VkAccelerationStructureBuildTypeKHR buildType;
5904         std::string name;
5905     } buildTypes[] = {
5906         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5907         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5908     };
5909 
5910     const struct
5911     {
5912         InstanceCustomIndexCase customIndexCase;
5913         std::string name;
5914     } customIndexCases[] = {
5915         {InstanceCustomIndexCase::ANY_HIT, "ahit"},
5916         {InstanceCustomIndexCase::CLOSEST_HIT, "chit"},
5917         {InstanceCustomIndexCase::INTERSECTION, "isec"},
5918     };
5919 
5920     const struct
5921     {
5922         uint32_t cullMask;
5923         std::string name;
5924     } cullMask[] = {
5925         {0x000000AAu, "4_bits"},
5926         {0x00000055u, "4_bits_reverse"},
5927         {0xAAAAAAAAu, "16_bits"},
5928         {0x55555555u, "16_bits_reverse"},
5929     };
5930 
5931     auto &ctx = group->getTestContext();
5932 
5933     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
5934     {
5935         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
5936             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
5937 
5938         for (int customIndexCaseIdx = 0; customIndexCaseIdx < DE_LENGTH_OF_ARRAY(customIndexCases);
5939              ++customIndexCaseIdx)
5940         {
5941             de::MovePtr<tcu::TestCaseGroup> customIndexCaseGroup(
5942                 new tcu::TestCaseGroup(ctx, customIndexCases[customIndexCaseIdx].name.c_str()));
5943 
5944             for (int cullMaskIdx = 0; cullMaskIdx < DE_LENGTH_OF_ARRAY(cullMask); ++cullMaskIdx)
5945             {
5946                 const auto &idxCase = customIndexCases[customIndexCaseIdx].customIndexCase;
5947                 const auto bottomGeometryType =
5948                     ((idxCase == InstanceCustomIndexCase::INTERSECTION) ? BottomTestType::AABBS :
5949                                                                           BottomTestType::TRIANGLES);
5950 
5951                 TestParams testParams{
5952                     buildTypes[buildTypeIdx].buildType,
5953                     VK_FORMAT_R32G32B32_SFLOAT,
5954                     false,
5955                     VK_INDEX_TYPE_NONE_KHR,
5956                     bottomGeometryType,
5957                     InstanceCullFlags::NONE,
5958                     false,
5959                     false,
5960                     false,
5961                     TopTestType::IDENTICAL_INSTANCES,
5962                     false,
5963                     false,
5964                     false,
5965                     VkBuildAccelerationStructureFlagsKHR(0u),
5966                     OT_NONE,
5967                     OP_NONE,
5968                     RTAS_DEFAULT_SIZE,
5969                     RTAS_DEFAULT_SIZE,
5970                     de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()),
5971                     0u,
5972                     EmptyAccelerationStructureCase::NOT_EMPTY,
5973                     customIndexCases[customIndexCaseIdx].customIndexCase,
5974                     true,
5975                     cullMask[cullMaskIdx].cullMask,
5976                     UpdateCase::NONE,
5977                 };
5978                 customIndexCaseGroup->addChild(
5979                     new RayTracingASBasicTestCase(ctx, cullMask[cullMaskIdx].name.c_str(), testParams));
5980             }
5981             buildTypeGroup->addChild(customIndexCaseGroup.release());
5982         }
5983         group->addChild(buildTypeGroup.release());
5984     }
5985 }
5986 
addGetDeviceAccelerationStructureCompabilityTests(tcu::TestCaseGroup * group)5987 void addGetDeviceAccelerationStructureCompabilityTests(tcu::TestCaseGroup *group)
5988 {
5989     struct
5990     {
5991         vk::VkAccelerationStructureBuildTypeKHR buildType;
5992         std::string name;
5993     } const buildTypes[] = {
5994         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
5995         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
5996     };
5997 
5998     struct
5999     {
6000         OperationTarget target;
6001         std::string name;
6002     } const targets[] = {
6003         {OT_TOP_ACCELERATION, "top"},
6004         {OT_BOTTOM_ACCELERATION, "bottom"},
6005     };
6006 
6007     auto &ctx = group->getTestContext();
6008 
6009     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
6010     {
6011         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
6012             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
6013 
6014         for (int targetIdx = 0; targetIdx < DE_LENGTH_OF_ARRAY(targets); ++targetIdx)
6015         {
6016             TestParams testParams{
6017                 buildTypes[buildTypeIdx].buildType, // buildType        - are we making AS on CPU or GPU
6018                 VK_FORMAT_R32G32B32_SFLOAT,         // vertexFormat
6019                 false,                              // padVertices
6020                 VK_INDEX_TYPE_NONE_KHR,             // indexType
6021                 BottomTestType::TRIANGLES,          // bottomTestType    - what kind of geometry is stored in bottom AS
6022                 InstanceCullFlags::NONE,            // cullFlags        - Flags for instances, if needed.
6023                 false, // bottomUsesAOP    - does bottom AS use arrays, or arrays of pointers
6024                 false, // bottomGeneric    - Bottom created as generic AS type.
6025                 false, // bottomUnboundedCreation - Create BLAS using buffers with unbounded memory.
6026                 TopTestType::
6027                     IDENTICAL_INSTANCES, // topTestType        - If instances are identical then bottom geometries must have different vertices/aabbs
6028                 false, // topUsesAOP        - does top AS use arrays, or arrays of pointers
6029                 false, // topGeneric        - Top created as generic AS type.
6030                 false, // topUnboundedCreation - Create TLAS using buffers with unbounded memory.
6031                 VkBuildAccelerationStructureFlagsKHR(0u),                          // buildFlags
6032                 targets[targetIdx].target,                                         // operationTarget
6033                 OP_NONE,                                                           // operationType
6034                 RTAS_DEFAULT_SIZE,                                                 // width
6035                 RTAS_DEFAULT_SIZE,                                                 // height
6036                 de::SharedPtr<TestConfiguration>(new CheckerboardConfiguration()), // testConfiguration
6037                 0u,                                                                // workerThreadsCount
6038                 EmptyAccelerationStructureCase::NOT_EMPTY,                         // emptyASCase
6039                 InstanceCustomIndexCase::NONE,                                     // instanceCustomIndexCase
6040                 false,                                                             // useCullMask
6041                 0xFFu,                                                             // cullMask
6042                 UpdateCase::NONE,                                                  // updateCase
6043             };
6044             buildTypeGroup->addChild(new RayTracingDeviceASCompabilityKHRTestCase(
6045                 ctx, targets[targetIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
6046         }
6047         group->addChild(buildTypeGroup.release());
6048     }
6049 }
6050 
addUpdateHeaderBottomAddressTests(tcu::TestCaseGroup * group)6051 void addUpdateHeaderBottomAddressTests(tcu::TestCaseGroup *group)
6052 {
6053     struct
6054     {
6055         vk::VkAccelerationStructureBuildTypeKHR buildType;
6056         std::string name;
6057     } const buildTypes[] = {
6058         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu_built"},
6059         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu_built"},
6060     };
6061 
6062     struct
6063     {
6064         TopTestType type;
6065         std::string name;
6066     } const instTypes[] = {
6067         {TopTestType::IDENTICAL_INSTANCES, "the_same_instances"},
6068         {TopTestType::DIFFERENT_INSTANCES, "different_instances"},
6069         {TopTestType::MIX_INSTANCES, "mix_same_diff_instances"},
6070     };
6071 
6072     auto &ctx = group->getTestContext();
6073 
6074     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
6075     {
6076         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
6077             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
6078 
6079         for (int instTypeIdx = 0; instTypeIdx < DE_LENGTH_OF_ARRAY(instTypes); ++instTypeIdx)
6080         {
6081             TestParams testParams{
6082                 buildTypes[buildTypeIdx].buildType,        // buildType
6083                 VK_FORMAT_R32G32B32_SFLOAT,                // vertexFormat
6084                 false,                                     // padVertices
6085                 VK_INDEX_TYPE_NONE_KHR,                    // indexType
6086                 BottomTestType::TRIANGLES,                 // bottomTestType
6087                 InstanceCullFlags::NONE,                   // cullFlags
6088                 false,                                     // bottomUsesAOP
6089                 false,                                     // bottomGeneric
6090                 false,                                     // bottomUnboundedCreation
6091                 instTypes[instTypeIdx].type,               // topTestType
6092                 false,                                     // topUsesAOP
6093                 false,                                     // topGeneric
6094                 false,                                     // topUnboundedCreation
6095                 VkBuildAccelerationStructureFlagsKHR(0u),  // buildFlags
6096                 OT_TOP_ACCELERATION,                       // operationTarget
6097                 OP_NONE,                                   // operationType
6098                 RTAS_DEFAULT_SIZE,                         // width
6099                 RTAS_DEFAULT_SIZE,                         // height
6100                 de::SharedPtr<TestConfiguration>(DE_NULL), // testConfiguration
6101                 0u,                                        // workerThreadsCount
6102                 EmptyAccelerationStructureCase::NOT_EMPTY, // emptyASCase
6103                 InstanceCustomIndexCase::NONE,             // instanceCustomIndexCase
6104                 false,                                     // useCullMask
6105                 0xFFu,                                     // cullMask
6106                 UpdateCase::NONE,                          // updateCase
6107             };
6108             buildTypeGroup->addChild(new RayTracingHeaderBottomAddressTestCase(
6109                 ctx, instTypes[instTypeIdx].name.c_str(), de::SharedPtr<TestParams>(new TestParams(testParams))));
6110         }
6111         group->addChild(buildTypeGroup.release());
6112     }
6113 }
6114 
addQueryPoolResultsTests(TestCaseGroup * group)6115 void addQueryPoolResultsTests(TestCaseGroup *group)
6116 {
6117     std::pair<VkAccelerationStructureBuildTypeKHR, const char *> const buildTypes[]{
6118         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu"},
6119         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu"},
6120     };
6121 
6122     std::pair<bool, const char *> const storeTypes[]{{false, "memory"}, {true, "buffer"}};
6123 
6124     std::pair<QueryPoolResultsParams::Type, const char *> const queryTypes[]{
6125         {QueryPoolResultsParams::Type::StructureSize, "structure_size"},
6126         {QueryPoolResultsParams::Type::PointerCount, "pointer_count"}};
6127 
6128     std::pair<bool, const char *> const buildWithCompacted[]{{false, "no_compacted"}, {true, "enable_compacted"}};
6129 
6130     auto &testContext = group->getTestContext();
6131     for (const auto &buildType : buildTypes)
6132     {
6133         auto buildTypeGroup = makeMovePtr<TestCaseGroup>(testContext, buildType.second);
6134         for (const auto &compacted : buildWithCompacted)
6135         {
6136             auto buildCompactedGroup = makeMovePtr<TestCaseGroup>(testContext, compacted.second);
6137             for (const auto &storeType : storeTypes)
6138             {
6139                 auto storeTypeGroup = makeMovePtr<TestCaseGroup>(testContext, storeType.second);
6140                 for (const auto &queryType : queryTypes)
6141                 {
6142                     QueryPoolResultsParams p;
6143                     p.buildType  = buildType.first;
6144                     p.inVkBuffer = storeType.first;
6145                     p.queryType  = queryType.first;
6146                     p.blasCount  = 5;
6147                     p.compacted  = compacted.first;
6148 
6149                     storeTypeGroup->addChild(
6150                         new QueryPoolResultsCase(testContext, queryType.second, makeSharedFrom(p)));
6151                 }
6152                 buildCompactedGroup->addChild(storeTypeGroup.release());
6153             }
6154             buildTypeGroup->addChild(buildCompactedGroup.release());
6155         }
6156         group->addChild(buildTypeGroup.release());
6157     }
6158 }
6159 
addCopyWithinPipelineTests(TestCaseGroup * group)6160 void addCopyWithinPipelineTests(TestCaseGroup *group)
6161 {
6162     std::pair<VkAccelerationStructureBuildTypeKHR, const char *> const buildTypes[]{
6163         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu"},
6164         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu"},
6165     };
6166     std::pair<CopyWithinPipelineParams::Type, const char *> const testTypes[]{
6167         {CopyWithinPipelineParams::Type::StageASCopyBit, "stage_as_copy_bit"},
6168         {CopyWithinPipelineParams::Type::StageAllTransferBit, "stage_all_transfer"},
6169         {CopyWithinPipelineParams::Type::AccessSBTReadBit, "access_sbt_read"}};
6170 
6171     auto &testContext = group->getTestContext();
6172     for (const auto &buildType : buildTypes)
6173     {
6174         auto buildTypeGroup = makeMovePtr<TestCaseGroup>(testContext, buildType.second);
6175         for (const auto &testType : testTypes)
6176         {
6177             CopyWithinPipelineParams p;
6178             p.width  = 16;
6179             p.height = 16;
6180             p.build  = buildType.first;
6181             p.type   = testType.first;
6182 
6183             buildTypeGroup->addChild(new PipelineStageASCase(testContext, testType.second, makeSharedFrom(p)));
6184         }
6185         group->addChild(buildTypeGroup.release());
6186     }
6187 }
6188 
addUpdateTests(TestCaseGroup * group)6189 void addUpdateTests(TestCaseGroup *group)
6190 {
6191     const struct
6192     {
6193         vk::VkAccelerationStructureBuildTypeKHR buildType;
6194         std::string name;
6195     } buildTypes[] = {
6196         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR, "cpu"},
6197         {VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, "gpu"},
6198     };
6199 
6200     struct
6201     {
6202         UpdateCase updateType;
6203         const char *name;
6204     } updateTypes[] = {
6205         {UpdateCase::VERTICES, "vertices"},
6206         {UpdateCase::INDICES, "indices"},
6207         {UpdateCase::TRANSFORM, "transform"},
6208     };
6209 
6210     auto &ctx = group->getTestContext();
6211 
6212     for (int buildTypeIdx = 0; buildTypeIdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildTypeIdx)
6213     {
6214         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
6215             new tcu::TestCaseGroup(ctx, buildTypes[buildTypeIdx].name.c_str()));
6216 
6217         for (int updateTypesIdx = 0; updateTypesIdx < DE_LENGTH_OF_ARRAY(updateTypes); ++updateTypesIdx)
6218         {
6219             TestParams testParams{
6220                 buildTypes[buildTypeIdx].buildType,
6221                 VK_FORMAT_R32G32B32_SFLOAT,
6222                 false,
6223                 VK_INDEX_TYPE_UINT16,
6224                 BottomTestType::TRIANGLES,
6225                 InstanceCullFlags::NONE,
6226                 false,
6227                 false,
6228                 false,
6229                 TopTestType::IDENTICAL_INSTANCES,
6230                 false,
6231                 false,
6232                 false,
6233                 VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR,
6234                 OT_TOP_ACCELERATION,
6235                 OP_NONE,
6236                 RTAS_DEFAULT_SIZE,
6237                 RTAS_DEFAULT_SIZE,
6238                 de::SharedPtr<TestConfiguration>(new UpdateableASConfiguration()),
6239                 0u,
6240                 EmptyAccelerationStructureCase::NOT_EMPTY,
6241                 InstanceCustomIndexCase::NONE,
6242                 false,
6243                 0xFFu,
6244                 updateTypes[updateTypesIdx].updateType,
6245             };
6246             buildTypeGroup->addChild(new ASUpdateCase(ctx, updateTypes[updateTypesIdx].name, testParams));
6247         }
6248         group->addChild(buildTypeGroup.release());
6249     }
6250 }
6251 
createAccelerationStructuresTests(tcu::TestContext & testCtx)6252 tcu::TestCaseGroup *createAccelerationStructuresTests(tcu::TestContext &testCtx)
6253 {
6254     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "acceleration_structures"));
6255 
6256     // Test building AS with different build types, build flags and geometries/instances using arrays or arrays of pointers
6257     addTestGroup(group.get(), "flags", addBasicBuildingTests);
6258     // Test building AS with different vertex and index formats
6259     addTestGroup(group.get(), "format", addVertexIndexFormatsTests);
6260     // Test copying, compaction and serialization of AS
6261     addTestGroup(group.get(), "operations", addOperationTests);
6262     // Test host threading operations
6263     addTestGroup(group.get(), "host_threading", addHostThreadingOperationTests);
6264     // Test using AS as function argument using both pointers and bare values
6265     addTestGroup(group.get(), "function_argument", addFuncArgTests);
6266     // Test building AS with counterclockwise triangles and/or disabling face culling
6267     addTestGroup(group.get(), "instance_triangle_culling", addInstanceTriangleCullingTests);
6268     // Test for CullMaskKHR builtin as a part of VK_KHR_ray_tracing_maintenance1
6269     addTestGroup(group.get(), "ray_cull_mask", addInstanceRayCullMaskTests);
6270     // Exercise dynamic indexing of acceleration structures
6271     addTestGroup(group.get(), "dynamic_indexing", addDynamicIndexingTests);
6272     // Test building empty acceleration structures using different methods
6273     addTestGroup(group.get(), "empty", addEmptyAccelerationStructureTests);
6274     // Test using different values for the instance index and checking them in shaders
6275     addTestGroup(group.get(), "instance_index", addInstanceIndexTests);
6276     // Test updating instance index using both in-place and separate src/dst acceleration structures
6277     addTestGroup(group.get(), "instance_update", addInstanceUpdateTests);
6278     addTestGroup(group.get(), "device_compability_khr", addGetDeviceAccelerationStructureCompabilityTests);
6279     addTestGroup(group.get(), "header_bottom_address", addUpdateHeaderBottomAddressTests);
6280     // Test for a new VkQueryPool queries for VK_KHR_ray_tracing_maintenance1
6281     addTestGroup(group.get(), "query_pool_results", addQueryPoolResultsTests);
6282     // Tests ACCELLERATION_STRUCTURE_COPY and ACCESS_2_SBT_READ with VK_KHR_ray_tracing_maintenance1
6283     addTestGroup(group.get(), "copy_within_pipeline", addCopyWithinPipelineTests);
6284     // Tests updating AS via replacing vertex/index/transform buffers
6285     addTestGroup(group.get(), "update", addUpdateTests);
6286 
6287     return group.release();
6288 }
6289 
6290 } // namespace RayTracing
6291 
6292 } // namespace vkt
6293