xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/ray_tracing/vktRayTracingBuildTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Build tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingBuildTests.hpp"
25 
26 #include "vkDefs.hpp"
27 
28 #include "vktTestCase.hpp"
29 #include "vkCmdUtil.hpp"
30 #include "vkObjUtil.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkImageUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 
38 #include "tcuTextureUtil.hpp"
39 
40 #include "vkRayTracingUtil.hpp"
41 
42 #include "deClock.h"
43 
44 #include <cmath>
45 #include <limits>
46 #include <iostream>
47 
48 namespace vkt
49 {
50 namespace RayTracing
51 {
52 namespace
53 {
54 using namespace vk;
55 using namespace std;
56 
57 static const VkFlags ALL_RAY_TRACING_STAGES = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
58                                               VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
59                                               VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
60 
61 enum TestType
62 {
63     TEST_TYPE_TRIANGLES,
64     TEST_TYPE_AABBS,
65     TEST_TYPE_MIXED,
66 };
67 
68 struct CaseDef
69 {
70     TestType testType;
71     uint32_t width;
72     uint32_t height;
73     uint32_t squaresGroupCount;
74     uint32_t geometriesGroupCount;
75     uint32_t instancesGroupCount;
76     bool deferredOperation;
77     uint32_t workerThreadsCount;
78     bool deviceBuild;
79 };
80 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)81 uint32_t getShaderGroupSize(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
82 {
83     de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR;
84 
85     rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
86     return rayTracingPropertiesKHR->getShaderGroupHandleSize();
87 }
88 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)89 uint32_t getShaderGroupBaseAlignment(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
90 {
91     de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR;
92 
93     rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
94     return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
95 }
96 
makeImageCreateInfo(uint32_t width,uint32_t height,VkFormat format)97 VkImageCreateInfo makeImageCreateInfo(uint32_t width, uint32_t height, VkFormat format)
98 {
99     const VkImageUsageFlags usage =
100         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
101     const VkImageCreateInfo imageCreateInfo = {
102         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
103         DE_NULL,                             // const void* pNext;
104         (VkImageCreateFlags)0u,              // VkImageCreateFlags flags;
105         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
106         format,                              // VkFormat format;
107         makeExtent3D(width, height, 1u),     // VkExtent3D extent;
108         1u,                                  // uint32_t mipLevels;
109         1u,                                  // uint32_t arrayLayers;
110         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
111         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
112         usage,                               // VkImageUsageFlags usage;
113         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
114         0u,                                  // uint32_t queueFamilyIndexCount;
115         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
116         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
117     };
118 
119     return imageCreateInfo;
120 }
121 
122 class RayTracingBuildTestInstance : public TestInstance
123 {
124 public:
125     typedef de::SharedPtr<BottomLevelAccelerationStructure> BlasPtr;
126     typedef de::SharedPtr<TopLevelAccelerationStructure> TlasPtr;
127     typedef BottomLevelAccelerationStructurePool BlasPool;
128 
129     RayTracingBuildTestInstance(Context &context, const CaseDef &data);
130     ~RayTracingBuildTestInstance(void);
131     tcu::TestStatus iterate(void);
132 
133 protected:
134     bool verifyAllocationCount() const;
135     void checkSupportInInstance(void) const;
136     uint32_t validateBuffer(de::MovePtr<BufferWithMemory> buffer);
137     de::MovePtr<BufferWithMemory> runTest(bool useGpuBuild, uint32_t workerThreadsCount);
138     TlasPtr initTopAccelerationStructure(bool useGpuBuild, uint32_t workerThreadsCount, const BlasPool &pool);
139     void createTopAccelerationStructure(VkCommandBuffer cmdBuffer, TopLevelAccelerationStructure *tlas);
140     void initBottomAccelerationStructures(BlasPool &pool, bool useGpuBuild, uint32_t workerThreadsCount) const;
141     void initBottomAccelerationStructure(BlasPtr blas, bool useGpuBuild, uint32_t workerThreadsCount,
142                                          tcu::UVec2 &startPos, bool triangles) const;
143 
144 private:
145     CaseDef m_data;
146     const VkFormat m_format;
147 };
148 
RayTracingBuildTestInstance(Context & context,const CaseDef & data)149 RayTracingBuildTestInstance::RayTracingBuildTestInstance(Context &context, const CaseDef &data)
150     : vkt::TestInstance(context)
151     , m_data(data)
152     , m_format(VK_FORMAT_R32_UINT)
153 {
154 }
155 
~RayTracingBuildTestInstance(void)156 RayTracingBuildTestInstance::~RayTracingBuildTestInstance(void)
157 {
158 }
159 
160 class RayTracingTestCase : public TestCase
161 {
162 public:
163     RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data);
164     ~RayTracingTestCase(void);
165 
166     virtual void initPrograms(SourceCollections &programCollection) const;
167     virtual TestInstance *createInstance(Context &context) const;
168     virtual void checkSupport(Context &context) const;
169 
170 private:
171     CaseDef m_data;
172 };
173 
RayTracingTestCase(tcu::TestContext & context,const char * name,const CaseDef data)174 RayTracingTestCase::RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data)
175     : vkt::TestCase(context, name)
176     , m_data(data)
177 {
178     DE_ASSERT((m_data.width * m_data.height) ==
179               (m_data.squaresGroupCount * m_data.geometriesGroupCount * m_data.instancesGroupCount));
180 }
181 
~RayTracingTestCase(void)182 RayTracingTestCase::~RayTracingTestCase(void)
183 {
184 }
185 
checkSupport(Context & context) const186 void RayTracingTestCase::checkSupport(Context &context) const
187 {
188     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
189     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
190 
191     const VkPhysicalDeviceRayTracingPipelineFeaturesKHR &rayTracingPipelineFeaturesKHR =
192         context.getRayTracingPipelineFeatures();
193     if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == false)
194         TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
195 
196     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
197         context.getAccelerationStructureFeatures();
198     if (accelerationStructureFeaturesKHR.accelerationStructure == false)
199         TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires "
200                              "VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
201 
202     if (!m_data.deviceBuild)
203     {
204         context.requireDeviceFunctionality("VK_KHR_deferred_host_operations");
205         if (accelerationStructureFeaturesKHR.accelerationStructureHostCommands == false)
206             TCU_THROW(NotSupportedError,
207                       "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
208     }
209 }
210 
initPrograms(SourceCollections & programCollection) const211 void RayTracingTestCase::initPrograms(SourceCollections &programCollection) const
212 {
213     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
214     {
215         std::stringstream css;
216         css << "#version 460 core\n"
217                "#extension GL_EXT_ray_tracing : require\n"
218                "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
219                "hitAttributeEXT vec3 attribs;\n"
220                "layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
221                "void main()\n"
222                "{\n"
223                "  uvec4 color = uvec4(1,0,0,1);\n"
224                "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), color);\n"
225                "}\n";
226 
227         programCollection.glslSources.add("ahit") << glu::AnyHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
228     }
229 
230     {
231         std::stringstream css;
232         css << "#version 460 core\n"
233                "#extension GL_EXT_ray_tracing : require\n"
234                "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n"
235                "layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
236                "void main()\n"
237                "{\n"
238                "  uvec4 color = uvec4(2,0,0,1);\n"
239                "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), color);\n"
240                "}\n";
241 
242         programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
243     }
244 
245     {
246         std::stringstream css;
247         css << "#version 460 core\n"
248                "#extension GL_EXT_ray_tracing : require\n"
249                "hitAttributeEXT vec3 hitAttribute;\n"
250                "void main()\n"
251                "{\n"
252                "  reportIntersectionEXT(1.0f, 0);\n"
253                "}\n";
254 
255         programCollection.glslSources.add("sect")
256             << glu::IntersectionSource(updateRayTracingGLSL(css.str())) << buildOptions;
257     }
258 
259     programCollection.glslSources.add("rgen")
260         << glu::RaygenSource(updateRayTracingGLSL(getCommonRayGenerationShader())) << buildOptions;
261 }
262 
createInstance(Context & context) const263 TestInstance *RayTracingTestCase::createInstance(Context &context) const
264 {
265     return new RayTracingBuildTestInstance(context, m_data);
266 }
267 
initTopAccelerationStructure(bool useGpuBuild,uint32_t workerThreadsCount,const BlasPool & pool)268 auto RayTracingBuildTestInstance::initTopAccelerationStructure(bool useGpuBuild, uint32_t workerThreadsCount,
269                                                                const BlasPool &pool) -> TlasPtr
270 {
271     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
272     const std::vector<BlasPtr> &blases                = pool.structures();
273 
274     result->setInstanceCount(blases.size());
275     result->setBuildType(useGpuBuild ? VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR :
276                                        VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR);
277     result->setDeferredOperation(m_data.deferredOperation, workerThreadsCount);
278 
279     for (size_t instanceNdx = 0; instanceNdx < blases.size(); ++instanceNdx)
280     {
281         const bool triangles =
282             (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
283         uint32_t instanceShaderBindingTableRecordOffset = triangles ? 0 : 1;
284 
285         result->addInstance(blases[instanceNdx], vk::identityMatrix3x4, 0, 0xFF,
286                             instanceShaderBindingTableRecordOffset);
287     }
288 
289     return TlasPtr(result.release());
290 }
291 
createTopAccelerationStructure(VkCommandBuffer cmdBuffer,TopLevelAccelerationStructure * tlas)292 void RayTracingBuildTestInstance::createTopAccelerationStructure(VkCommandBuffer cmdBuffer,
293                                                                  TopLevelAccelerationStructure *tlas)
294 {
295     const DeviceInterface &vkd = m_context.getDeviceInterface();
296     const VkDevice device      = m_context.getDevice();
297     Allocator &allocator       = m_context.getDefaultAllocator();
298 
299     tlas->createAndBuild(vkd, device, cmdBuffer, allocator);
300 }
301 
initBottomAccelerationStructure(BlasPtr blas,bool useGpuBuild,uint32_t workerThreadsCount,tcu::UVec2 & startPos,bool triangles) const302 void RayTracingBuildTestInstance::initBottomAccelerationStructure(BlasPtr blas, bool useGpuBuild,
303                                                                   uint32_t workerThreadsCount, tcu::UVec2 &startPos,
304                                                                   bool triangles) const
305 {
306     blas->setBuildType(useGpuBuild ? VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR :
307                                      VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR);
308     blas->setDeferredOperation(m_data.deferredOperation, workerThreadsCount);
309     blas->setGeometryCount(m_data.geometriesGroupCount);
310 
311     for (size_t geometryNdx = 0; geometryNdx < m_data.geometriesGroupCount; ++geometryNdx)
312     {
313         std::vector<tcu::Vec3> geometryData;
314 
315         geometryData.reserve(m_data.squaresGroupCount * (triangles ? 3u : 2u));
316 
317         for (size_t squareNdx = 0; squareNdx < m_data.squaresGroupCount; ++squareNdx)
318         {
319             const uint32_t n = m_data.width * startPos.y() + startPos.x();
320             const float x0   = float(startPos.x() + 0) / float(m_data.width);
321             const float y0   = float(startPos.y() + 0) / float(m_data.height);
322             const float x1   = float(startPos.x() + 1) / float(m_data.width);
323             const float y1   = float(startPos.y() + 1) / float(m_data.height);
324             const float z    = (n % 7 == 0) ? +1.0f : -1.0f;
325             const uint32_t m = (n + 13) % (m_data.width * m_data.height);
326 
327             if (triangles)
328             {
329                 const float xm = (x0 + x1) / 2.0f;
330                 const float ym = (y0 + y1) / 2.0f;
331 
332                 geometryData.push_back(tcu::Vec3(x0, y0, z));
333                 geometryData.push_back(tcu::Vec3(x1, ym, z));
334                 geometryData.push_back(tcu::Vec3(xm, y1, z));
335             }
336             else
337             {
338                 geometryData.push_back(tcu::Vec3(x0, y0, z));
339                 geometryData.push_back(tcu::Vec3(x1, y1, z));
340             }
341 
342             startPos.y() = m / m_data.width;
343             startPos.x() = m % m_data.width;
344         }
345 
346         blas->addGeometry(geometryData, triangles);
347     }
348 }
349 
initBottomAccelerationStructures(BlasPool & pool,bool useGpuBuild,uint32_t workerThreadsCount) const350 void RayTracingBuildTestInstance::initBottomAccelerationStructures(BlasPool &pool, bool useGpuBuild,
351                                                                    uint32_t workerThreadsCount) const
352 {
353     tcu::UVec2 startPos{};
354     const DeviceInterface &vkd     = m_context.getDeviceInterface();
355     const VkDevice device          = m_context.getDevice();
356     Allocator &allocator           = m_context.getDefaultAllocator();
357     const VkDeviceSize maxBuffSize = 3 * (VkDeviceSize(1) << 30); // 3GB
358 
359     for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)
360         pool.add();
361 
362     const std::vector<BlasPtr> &blases = pool.structures();
363 
364     for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)
365     {
366         const bool triangles =
367             (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
368         initBottomAccelerationStructure(blases[instanceNdx], useGpuBuild, workerThreadsCount, startPos, triangles);
369     }
370 
371     pool.batchCreateAdjust(vkd, device, allocator, maxBuffSize);
372 }
373 
verifyAllocationCount() const374 bool RayTracingBuildTestInstance::verifyAllocationCount() const
375 {
376     BlasPool pool{};
377     tcu::UVec2 startPos{};
378     const DeviceInterface &vkd        = m_context.getDeviceInterface();
379     const VkDevice device             = m_context.getDevice();
380     auto &log                         = m_context.getTestContext().getLog();
381     const size_t avvailableAllocCount = m_context.getDeviceProperties().limits.maxMemoryAllocationCount;
382     const VkDeviceSize maxBufferSize  = 3 * (VkDeviceSize(1) << 30); // 3GB
383 
384     for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)
385         pool.add();
386 
387     const std::vector<BlasPtr> &blases = pool.structures();
388 
389     for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)
390     {
391         const bool triangles =
392             (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
393         initBottomAccelerationStructure(blases[instanceNdx], true, 0, startPos, triangles);
394     }
395 
396     const size_t poolAllocationCount     = pool.getAllocationCount(vkd, device, maxBufferSize);
397     const size_t requiredAllocationCount = poolAllocationCount + 120;
398 
399     log << tcu::TestLog::Message << "The test consumes " << poolAllocationCount << " allocations out of "
400         << avvailableAllocCount << " available" << tcu::TestLog::EndMessage;
401 
402     return (requiredAllocationCount < avvailableAllocCount);
403 }
404 
runTest(bool useGpuBuild,uint32_t workerThreadsCount)405 de::MovePtr<BufferWithMemory> RayTracingBuildTestInstance::runTest(bool useGpuBuild, uint32_t workerThreadsCount)
406 {
407     const InstanceInterface &vki            = m_context.getInstanceInterface();
408     const DeviceInterface &vkd              = m_context.getDeviceInterface();
409     const VkDevice device                   = m_context.getDevice();
410     const VkPhysicalDevice physicalDevice   = m_context.getPhysicalDevice();
411     const uint32_t queueFamilyIndex         = m_context.getUniversalQueueFamilyIndex();
412     const VkQueue queue                     = m_context.getUniversalQueue();
413     Allocator &allocator                    = m_context.getDefaultAllocator();
414     const uint32_t pixelCount               = m_data.width * m_data.height;
415     const uint32_t shaderGroupHandleSize    = getShaderGroupSize(vki, physicalDevice);
416     const uint32_t shaderGroupBaseAlignment = getShaderGroupBaseAlignment(vki, physicalDevice);
417 
418     const Move<VkDescriptorSetLayout> descriptorSetLayout =
419         DescriptorSetLayoutBuilder()
420             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
421             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
422             .build(vkd, device);
423     const Move<VkDescriptorPool> descriptorPool =
424         DescriptorPoolBuilder()
425             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
426             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
427             .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
428     const Move<VkDescriptorSet> descriptorSet   = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
429     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
430     const Move<VkCommandPool> cmdPool           = createCommandPool(vkd, device, 0, queueFamilyIndex);
431     const Move<VkCommandBuffer> cmdBuffer =
432         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
433 
434     de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
435     Move<VkShaderModule> raygenShader = createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0);
436     Move<VkShaderModule> hitShader    = createShaderModule(vkd, device, m_context.getBinaryCollection().get("ahit"), 0);
437     Move<VkShaderModule> missShader   = createShaderModule(vkd, device, m_context.getBinaryCollection().get("miss"), 0);
438     Move<VkShaderModule> intersectionShader =
439         createShaderModule(vkd, device, m_context.getBinaryCollection().get("sect"), 0);
440     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, *raygenShader, 0u);
441     rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, *hitShader, 1u);
442     rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, *hitShader, 2u);
443     rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, *intersectionShader, 2u);
444     rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, *missShader, 3u);
445     Move<VkPipeline> pipeline = rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
446     const de::MovePtr<BufferWithMemory> raygenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
447         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0u, 1u);
448     const de::MovePtr<BufferWithMemory> hitShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
449         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1u, 2u);
450     const de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
451         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 3u, 1u);
452     const VkStridedDeviceAddressRegionKHR raygenShaderBindingTableRegion =
453         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0),
454                                           shaderGroupHandleSize, shaderGroupHandleSize);
455     const VkStridedDeviceAddressRegionKHR hitShaderBindingTableRegion =
456         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0),
457                                           shaderGroupHandleSize, 2u * shaderGroupHandleSize);
458     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
459         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
460                                           shaderGroupHandleSize, shaderGroupHandleSize);
461     const VkStridedDeviceAddressRegionKHR callableShaderBindingTableRegion =
462         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
463 
464     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_data.width, m_data.height, m_format);
465     const VkImageSubresourceRange imageSubresourceRange =
466         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
467     const de::MovePtr<ImageWithMemory> image = de::MovePtr<ImageWithMemory>(
468         new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
469     const Move<VkImageView> imageView =
470         makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
471 
472     const VkBufferCreateInfo bufferCreateInfo =
473         makeBufferCreateInfo(pixelCount * sizeof(uint32_t), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
474     const VkImageSubresourceLayers bufferImageSubresourceLayers =
475         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
476     const VkBufferImageCopy bufferImageRegion =
477         makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), bufferImageSubresourceLayers);
478     de::MovePtr<BufferWithMemory> buffer = de::MovePtr<BufferWithMemory>(
479         new BufferWithMemory(vkd, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible));
480 
481     const VkDescriptorImageInfo descriptorImageInfo =
482         makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
483 
484     const VkImageMemoryBarrier preImageBarrier =
485         makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
486                                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image, imageSubresourceRange);
487     const VkImageMemoryBarrier postImageBarrier = makeImageMemoryBarrier(
488         VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
489         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image, imageSubresourceRange);
490     const VkMemoryBarrier postTraceMemoryBarrier =
491         makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
492     const VkMemoryBarrier postCopyMemoryBarrier =
493         makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
494     const VkClearValue clearValue = makeClearValueColorU32(5u, 5u, 5u, 255u);
495 
496     qpWatchDog *watchDog = m_context.getTestContext().getWatchDog();
497     TlasPtr topLevelAccelerationStructure;
498     BottomLevelAccelerationStructurePool blasPool;
499 
500     initBottomAccelerationStructures(blasPool, useGpuBuild, workerThreadsCount);
501     blasPool.batchBuild(vkd, device, *cmdPool, queue, watchDog);
502 
503     beginCommandBuffer(vkd, *cmdBuffer, 0u);
504     {
505         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
506                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
507         vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
508                                &imageSubresourceRange);
509         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
510                                       VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &postImageBarrier);
511 
512         topLevelAccelerationStructure = initTopAccelerationStructure(useGpuBuild, workerThreadsCount, blasPool);
513         createTopAccelerationStructure(*cmdBuffer, topLevelAccelerationStructure.get());
514 
515         VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
516             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
517             DE_NULL,                                                           //  const void* pNext;
518             1u,                                                                //  uint32_t accelerationStructureCount;
519             topLevelAccelerationStructure->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
520         };
521 
522         DescriptorSetUpdateBuilder()
523             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
524                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
525             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
526                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
527             .update(vkd, device);
528 
529         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
530                                   &descriptorSet.get(), 0, DE_NULL);
531 
532         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
533 
534         cmdTraceRays(vkd, *cmdBuffer, &raygenShaderBindingTableRegion, &missShaderBindingTableRegion,
535                      &hitShaderBindingTableRegion, &callableShaderBindingTableRegion, m_data.width, m_data.height, 1);
536 
537         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
538                                  VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
539 
540         vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **buffer, 1u, &bufferImageRegion);
541 
542         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
543                                  &postCopyMemoryBarrier);
544     }
545     endCommandBuffer(vkd, *cmdBuffer);
546 
547     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
548 
549     invalidateMappedMemoryRange(vkd, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(),
550                                 pixelCount * sizeof(uint32_t));
551 
552     return buffer;
553 }
554 
checkSupportInInstance(void) const555 void RayTracingBuildTestInstance::checkSupportInInstance(void) const
556 {
557     const InstanceInterface &vki                           = m_context.getInstanceInterface();
558     const VkPhysicalDevice physicalDevice                  = m_context.getPhysicalDevice();
559     de::MovePtr<RayTracingProperties> rayTracingProperties = makeRayTracingProperties(vki, physicalDevice);
560 
561     if (rayTracingProperties->getMaxPrimitiveCount() < m_data.squaresGroupCount)
562         TCU_THROW(NotSupportedError, "Triangles required more than supported");
563 
564     if (rayTracingProperties->getMaxGeometryCount() < m_data.geometriesGroupCount)
565         TCU_THROW(NotSupportedError, "Geometries required more than supported");
566 
567     if (rayTracingProperties->getMaxInstanceCount() < m_data.instancesGroupCount)
568         TCU_THROW(NotSupportedError, "Instances required more than supported");
569 
570     if (!verifyAllocationCount())
571         TCU_THROW(NotSupportedError, "Memory allocations required more than supported");
572 }
573 
validateBuffer(de::MovePtr<BufferWithMemory> buffer)574 uint32_t RayTracingBuildTestInstance::validateBuffer(de::MovePtr<BufferWithMemory> buffer)
575 {
576     const uint32_t *bufferPtr = (uint32_t *)buffer->getAllocation().getHostPtr();
577     uint32_t failures         = 0;
578     uint32_t pos              = 0;
579 
580     for (uint32_t y = 0; y < m_data.height; ++y)
581         for (uint32_t x = 0; x < m_data.width; ++x)
582         {
583             const uint32_t anyHitValue = 1;
584             const uint32_t missValue   = 2;
585 
586             const uint32_t n             = m_data.width * y + x;
587             const uint32_t expectedValue = (n % 7 == 0) ? missValue : anyHitValue;
588 
589             if (bufferPtr[pos] != expectedValue)
590             {
591                 if (m_data.testType == TEST_TYPE_AABBS || m_data.testType == TEST_TYPE_MIXED)
592                 {
593                     // In the case of AABB geometries, implementations may increase their size in
594                     // an acceleration structure in order to mitigate precision issues. This may
595                     // result in false positives being reported to the application."
596 
597                     if (bufferPtr[pos] != anyHitValue)
598                     {
599                         failures++;
600                     }
601                 }
602                 else
603                 {
604                     failures++;
605                 }
606             }
607 
608             ++pos;
609         }
610 
611     return failures;
612 }
613 
iterate(void)614 tcu::TestStatus RayTracingBuildTestInstance::iterate(void)
615 {
616     checkSupportInInstance();
617 
618     const uint32_t failures = validateBuffer(runTest(m_data.deviceBuild, m_data.workerThreadsCount));
619 
620     return (failures == 0) ? tcu::TestStatus::pass("Pass") :
621                              tcu::TestStatus::fail("failures=" + de::toString(failures));
622 }
623 
624 } // namespace
625 
buildTest(tcu::TestCaseGroup * testParentGroup,uint32_t threadsCount,bool deviceBuild)626 static void buildTest(tcu::TestCaseGroup *testParentGroup, uint32_t threadsCount, bool deviceBuild)
627 {
628     const char *tests[]          = {"level_primitives", "level_geometries", "level_instances"};
629     const uint32_t sizes[]       = {4, 16, 64, 256, 1024};
630     const uint32_t factors[]     = {1, 4};
631     const bool deferredOperation = threadsCount != 0;
632     tcu::TestContext &testCtx    = testParentGroup->getTestContext();
633 
634     for (size_t testsNdx = 0; testsNdx < DE_LENGTH_OF_ARRAY(tests); ++testsNdx)
635     {
636         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, tests[testsNdx]));
637 
638         for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
639             for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
640             {
641                 if (deviceBuild && sizes[sizesNdx] > 256)
642                     continue;
643                 const uint32_t factor               = factors[factorNdx];
644                 const uint32_t largestGroup         = sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
645                 const uint32_t squaresGroupCount    = testsNdx == 0 ? largestGroup : factor;
646                 const uint32_t geometriesGroupCount = testsNdx == 1 ? largestGroup : factor;
647                 const uint32_t instancesGroupCount  = testsNdx == 2 ? largestGroup : factor;
648                 const CaseDef caseDef               = {
649                     TEST_TYPE_TRIANGLES,  //  TestType testType;
650                     sizes[sizesNdx],      //  uint32_t width;
651                     sizes[sizesNdx],      //  uint32_t height;
652                     squaresGroupCount,    //  uint32_t squaresGroupCount;
653                     geometriesGroupCount, //  uint32_t geometriesGroupCount;
654                     instancesGroupCount,  //  uint32_t instancesGroupCount;
655                     deferredOperation,    //  bool deferredOperation;
656                     threadsCount,         //  uint32_t workerThreadsCount;
657                     deviceBuild           //  bool deviceBuild;
658                 };
659                 const std::string suffix = de::toString(caseDef.instancesGroupCount) + '_' +
660                                            de::toString(caseDef.geometriesGroupCount) + '_' +
661                                            de::toString(caseDef.squaresGroupCount);
662                 const std::string testName = "triangles_" + suffix;
663 
664                 if (squaresGroupCount == 0 || geometriesGroupCount == 0 || instancesGroupCount == 0)
665                     continue;
666 
667                 group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), caseDef));
668             }
669 
670         for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
671             for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
672             {
673                 if (deviceBuild && sizes[sizesNdx] > 256)
674                     continue;
675                 const uint32_t factor               = factors[factorNdx];
676                 const uint32_t largestGroup         = sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
677                 const uint32_t squaresGroupCount    = testsNdx == 0 ? largestGroup : factor;
678                 const uint32_t geometriesGroupCount = testsNdx == 1 ? largestGroup : factor;
679                 const uint32_t instancesGroupCount  = testsNdx == 2 ? largestGroup : factor;
680                 const CaseDef caseDef               = {
681                     TEST_TYPE_AABBS,      //  TestType testType;
682                     sizes[sizesNdx],      //  uint32_t width;
683                     sizes[sizesNdx],      //  uint32_t height;
684                     squaresGroupCount,    //  uint32_t squaresGroupCount;
685                     geometriesGroupCount, //  uint32_t geometriesGroupCount;
686                     instancesGroupCount,  //  uint32_t instancesGroupCount;
687                     deferredOperation,    //  bool deferredOperation;
688                     threadsCount,         //  uint32_t workerThreadsCount;
689                     deviceBuild           //  bool deviceBuild;
690                 };
691                 const std::string suffix = de::toString(caseDef.instancesGroupCount) + '_' +
692                                            de::toString(caseDef.geometriesGroupCount) + '_' +
693                                            de::toString(caseDef.squaresGroupCount);
694                 const std::string testName = "aabbs_" + suffix;
695 
696                 if (squaresGroupCount == 0 || geometriesGroupCount == 0 || instancesGroupCount == 0)
697                     continue;
698 
699                 group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), caseDef));
700             }
701 
702         for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
703             for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
704             {
705                 if (deviceBuild && sizes[sizesNdx] > 256)
706                     continue;
707                 const uint32_t factor               = factors[factorNdx];
708                 const uint32_t largestGroup         = sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
709                 const uint32_t squaresGroupCount    = testsNdx == 0 ? largestGroup : factor;
710                 const uint32_t geometriesGroupCount = testsNdx == 1 ? largestGroup : factor;
711                 const uint32_t instancesGroupCount  = testsNdx == 2 ? largestGroup : factor;
712                 const CaseDef caseDef               = {
713                     TEST_TYPE_MIXED,      //  TestType testType;
714                     sizes[sizesNdx],      //  uint32_t width;
715                     sizes[sizesNdx],      //  uint32_t height;
716                     squaresGroupCount,    //  uint32_t squaresGroupCount;
717                     geometriesGroupCount, //  uint32_t geometriesGroupCount;
718                     instancesGroupCount,  //  uint32_t instancesGroupCount;
719                     deferredOperation,    //  bool deferredOperation;
720                     threadsCount,         //  uint32_t workerThreadsCount;
721                     deviceBuild           //  bool deviceBuild;
722                 };
723                 const std::string suffix = de::toString(caseDef.instancesGroupCount) + '_' +
724                                            de::toString(caseDef.geometriesGroupCount) + '_' +
725                                            de::toString(caseDef.squaresGroupCount);
726                 const std::string testName = "mixed_" + suffix;
727 
728                 if (squaresGroupCount < 2 || geometriesGroupCount < 2 || instancesGroupCount < 2)
729                     continue;
730 
731                 group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), caseDef));
732             }
733 
734         testParentGroup->addChild(group.release());
735     }
736 }
737 
createBuildTests(tcu::TestContext & testCtx)738 tcu::TestCaseGroup *createBuildTests(tcu::TestContext &testCtx)
739 {
740     // Ray tracing build tests
741     de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(testCtx, "build"));
742 
743     const uint32_t threads[] = {0, 1, 2, 3, 4, 8, std::numeric_limits<uint32_t>::max()};
744 
745     for (const auto threadCount : threads)
746     {
747         auto buildTargeGroup = [&](bool deviceBuild) -> void
748         {
749             DE_ASSERT(!(threadCount != 0 && deviceBuild));
750 
751             string groupName, groupDesc;
752             if (deviceBuild)
753             {
754                 groupName = "gpu";
755                 groupDesc = "Compare results of run with acceleration structures build on GPU";
756             }
757             else
758             {
759                 groupName = "cpu";
760                 groupDesc = "Compare results of run with acceleration structures build on CPU";
761             }
762 
763             if (threadCount != 0)
764             {
765                 groupName +=
766                     threadCount == std::numeric_limits<uint32_t>::max() ? "ht_max" : "ht_" + de::toString(threadCount);
767                 groupDesc = "Compare results of run with acceleration structures build on CPU and using host threading";
768             }
769 
770             de::MovePtr<tcu::TestCaseGroup> groupGpuCpuHt(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
771             buildTest(groupGpuCpuHt.get(), threadCount, deviceBuild);
772             buildGroup->addChild(groupGpuCpuHt.release());
773         };
774 
775         if (threadCount == 0)
776         {
777             buildTargeGroup(true);
778         }
779         buildTargeGroup(false);
780     }
781 
782     return buildGroup.release();
783 }
784 
785 } // namespace RayTracing
786 } // namespace vkt
787