1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Build Large Shader Set tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingBuildLargeTests.hpp"
25 
26 #include "vkDefs.hpp"
27 
28 #include "vktTestCase.hpp"
29 #include "vkCmdUtil.hpp"
30 #include "vkObjUtil.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkTypeUtil.hpp"
36 
37 #include "vkRayTracingUtil.hpp"
38 
39 #include "deClock.h"
40 
41 #include <limits>
42 
43 namespace vkt
44 {
45 namespace RayTracing
46 {
47 namespace
48 {
49 using namespace vk;
50 using namespace std;
51 
52 static const VkFlags ALL_RAY_TRACING_STAGES = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
53                                               VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
54                                               VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
55 
56 struct CaseDef
57 {
58     uint32_t width;
59     uint32_t height;
60     uint32_t squaresGroupCount;
61     uint32_t geometriesGroupCount;
62     uint32_t instancesGroupCount;
63     bool deferredOperation;
64     VkAccelerationStructureBuildTypeKHR buildType;
65     uint32_t workerThreadsCount;
66 };
67 
getShaderGroupSize(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)68 uint32_t getShaderGroupSize(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
69 {
70     de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR;
71 
72     rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
73 
74     return rayTracingPropertiesKHR->getShaderGroupHandleSize();
75 }
76 
getShaderGroupBaseAlignment(const InstanceInterface & vki,const VkPhysicalDevice physicalDevice)77 uint32_t getShaderGroupBaseAlignment(const InstanceInterface &vki, const VkPhysicalDevice physicalDevice)
78 {
79     de::MovePtr<RayTracingProperties> rayTracingPropertiesKHR;
80 
81     rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
82 
83     return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
84 }
85 
makePipeline(const DeviceInterface & vkd,const VkDevice device,vk::BinaryCollection & collection,de::MovePtr<RayTracingPipeline> & rayTracingPipeline,VkPipelineLayout pipelineLayout,const uint32_t groupCount,const bool deferredOperation,const uint32_t threadCount)86 Move<VkPipeline> makePipeline(const DeviceInterface &vkd, const VkDevice device, vk::BinaryCollection &collection,
87                               de::MovePtr<RayTracingPipeline> &rayTracingPipeline, VkPipelineLayout pipelineLayout,
88                               const uint32_t groupCount, const bool deferredOperation, const uint32_t threadCount)
89 {
90     Move<VkShaderModule> raygenShader = createShaderModule(vkd, device, collection.get("rgen"), 0);
91 
92     rayTracingPipeline->setDeferredOperation(deferredOperation, threadCount);
93     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, raygenShader, 0);
94 
95     for (uint32_t groupNdx = 0; groupNdx < groupCount; ++groupNdx)
96     {
97         const std::string shaderName    = "call" + de::toString(groupNdx);
98         Move<VkShaderModule> callShader = createShaderModule(vkd, device, collection.get(shaderName), 0);
99 
100         rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, callShader, 1 + groupNdx);
101     }
102 
103     Move<VkPipeline> pipeline = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
104 
105     return pipeline;
106 }
107 
makeImageCreateInfo(uint32_t width,uint32_t height,VkFormat format)108 VkImageCreateInfo makeImageCreateInfo(uint32_t width, uint32_t height, VkFormat format)
109 {
110     const VkImageUsageFlags usage =
111         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
112     const VkImageCreateInfo imageCreateInfo = {
113         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
114         DE_NULL,                             // const void* pNext;
115         (VkImageCreateFlags)0u,              // VkImageCreateFlags flags;
116         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
117         format,                              // VkFormat format;
118         makeExtent3D(width, height, 1u),     // VkExtent3D extent;
119         1u,                                  // uint32_t mipLevels;
120         1u,                                  // uint32_t arrayLayers;
121         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
122         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
123         usage,                               // VkImageUsageFlags usage;
124         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
125         0u,                                  // uint32_t queueFamilyIndexCount;
126         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
127         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
128     };
129 
130     return imageCreateInfo;
131 }
132 
133 class RayTracingBuildLargeTestInstance : public TestInstance
134 {
135 public:
136     RayTracingBuildLargeTestInstance(Context &context, const CaseDef &data);
137     ~RayTracingBuildLargeTestInstance(void);
138     tcu::TestStatus iterate(void);
139 
140 protected:
141     uint32_t iterateNoWorkers(void);
142     uint32_t iterateWithWorkers(void);
143     void checkSupportInInstance(void) const;
144     de::MovePtr<BufferWithMemory> runTest(const uint32_t threadCount);
145     uint32_t validateBuffer(de::MovePtr<BufferWithMemory> buffer);
146     de::SharedPtr<TopLevelAccelerationStructure> initTopAccelerationStructure(
147         VkCommandBuffer cmdBuffer, de::SharedPtr<BottomLevelAccelerationStructure> &bottomLevelAccelerationStructure);
148     de::SharedPtr<BottomLevelAccelerationStructure> initBottomAccelerationStructure(VkCommandBuffer cmdBuffer);
149 
150 private:
151     CaseDef m_data;
152 };
153 
RayTracingBuildLargeTestInstance(Context & context,const CaseDef & data)154 RayTracingBuildLargeTestInstance::RayTracingBuildLargeTestInstance(Context &context, const CaseDef &data)
155     : vkt::TestInstance(context)
156     , m_data(data)
157 {
158 }
159 
~RayTracingBuildLargeTestInstance(void)160 RayTracingBuildLargeTestInstance::~RayTracingBuildLargeTestInstance(void)
161 {
162 }
163 
164 class RayTracingTestCase : public TestCase
165 {
166 public:
167     RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data);
168     ~RayTracingTestCase(void);
169 
170     virtual void initPrograms(SourceCollections &programCollection) const;
171     virtual TestInstance *createInstance(Context &context) const;
172     virtual void checkSupport(Context &context) const;
173 
174 private:
175     std::string generateDummyWork(const uint32_t shaderNdx) const;
176     CaseDef m_data;
177 };
178 
RayTracingTestCase(tcu::TestContext & context,const char * name,const CaseDef data)179 RayTracingTestCase::RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data)
180     : vkt::TestCase(context, name)
181     , m_data(data)
182 {
183     DE_ASSERT((m_data.width * m_data.height) ==
184               (m_data.squaresGroupCount * m_data.geometriesGroupCount * m_data.instancesGroupCount));
185 }
186 
~RayTracingTestCase(void)187 RayTracingTestCase::~RayTracingTestCase(void)
188 {
189 }
190 
checkSupport(Context & context) const191 void RayTracingTestCase::checkSupport(Context &context) const
192 {
193     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
194     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
195 
196     const VkPhysicalDeviceRayTracingPipelineFeaturesKHR &rayTracingPipelineFeaturesKHR =
197         context.getRayTracingPipelineFeatures();
198     if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == false)
199         TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
200 
201     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
202         context.getAccelerationStructureFeatures();
203     if (accelerationStructureFeaturesKHR.accelerationStructure == false)
204         TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires "
205                              "VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
206 
207     if (m_data.buildType == VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR &&
208         accelerationStructureFeaturesKHR.accelerationStructureHostCommands == false)
209         TCU_THROW(NotSupportedError,
210                   "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
211 
212     if (m_data.deferredOperation)
213         context.requireDeviceFunctionality("VK_KHR_deferred_host_operations");
214 }
215 
generateDummyWork(const uint32_t shaderNdx) const216 std::string RayTracingTestCase::generateDummyWork(const uint32_t shaderNdx) const
217 {
218     std::string result;
219 
220     for (uint32_t n = 0; n < shaderNdx % 256; ++n)
221     {
222         result += "  color.b = color.b + 2 * " + de::toString(n) + ";\n";
223         result += "  color.g = color.g + 3 * " + de::toString(n) + ";\n";
224         result += "  color.b = color.b ^ color.g;\n";
225         result += "  color.b = color.b % 223;\n";
226         result += "  color.g = color.g % 227;\n";
227         result += "  color.g = color.g ^ color.b;\n";
228     }
229 
230     return result;
231 }
232 
initPrograms(SourceCollections & programCollection) const233 void RayTracingTestCase::initPrograms(SourceCollections &programCollection) const
234 {
235     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
236     {
237         std::stringstream css;
238         css << "#version 460 core\n"
239                "#extension GL_EXT_ray_tracing : require\n"
240                "layout(location = 0) callableDataEXT float dummy;"
241                "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
242                "\n"
243                "void main()\n"
244                "{\n"
245                "  uint n = "
246             << m_data.width
247             << " * gl_LaunchIDEXT.y + gl_LaunchIDEXT.x;\n"
248                "  executeCallableEXT(n, 0);\n"
249                "}\n";
250 
251         programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
252     }
253 
254     for (uint32_t y = 0; y < m_data.height; ++y)
255         for (uint32_t x = 0; x < m_data.width; ++x)
256         {
257             const uint32_t shaderNdx = m_data.width * y + x;
258             const bool dummyWork     = (shaderNdx % 43 == 0);
259             std::stringstream css;
260             css << "#version 460 core\n"
261                    "#extension GL_EXT_ray_tracing : require\n"
262                    "layout(location = 0) callableDataInEXT float dummy;\n"
263                    "layout(r32ui, set = 0, binding = 0) uniform uimage2D image0_0;\n"
264                    "void main()\n"
265                    "{\n"
266                    "  uint r = ("
267                 << m_data.width << " * " << y / 3 << " + " << x
268                 << ") % 199;\n"
269                    "  uvec4 color = uvec4(r,0,0,1);\n"
270                 << (dummyWork ? generateDummyWork(shaderNdx) : "")
271                 << "  imageStore(image0_0, ivec2(gl_LaunchIDEXT.xy), color);\n"
272                    "}\n";
273 
274             programCollection.glslSources.add("call" + de::toString(shaderNdx))
275                 << glu::CallableSource(updateRayTracingGLSL(css.str())) << buildOptions;
276         }
277 }
278 
createInstance(Context & context) const279 TestInstance *RayTracingTestCase::createInstance(Context &context) const
280 {
281     return new RayTracingBuildLargeTestInstance(context, m_data);
282 }
283 
initTopAccelerationStructure(VkCommandBuffer cmdBuffer,de::SharedPtr<BottomLevelAccelerationStructure> & bottomLevelAccelerationStructure)284 de::SharedPtr<TopLevelAccelerationStructure> RayTracingBuildLargeTestInstance::initTopAccelerationStructure(
285     VkCommandBuffer cmdBuffer, de::SharedPtr<BottomLevelAccelerationStructure> &bottomLevelAccelerationStructure)
286 {
287     const DeviceInterface &vkd                        = m_context.getDeviceInterface();
288     const VkDevice device                             = m_context.getDevice();
289     Allocator &allocator                              = m_context.getDefaultAllocator();
290     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
291 
292     result->setInstanceCount(1);
293     result->setBuildType(m_data.buildType);
294     result->setDeferredOperation(m_data.deferredOperation);
295     result->addInstance(bottomLevelAccelerationStructure);
296 
297     result->createAndBuild(vkd, device, cmdBuffer, allocator);
298 
299     return de::SharedPtr<TopLevelAccelerationStructure>(result.release());
300 }
301 
initBottomAccelerationStructure(VkCommandBuffer cmdBuffer)302 de::SharedPtr<BottomLevelAccelerationStructure> RayTracingBuildLargeTestInstance::initBottomAccelerationStructure(
303     VkCommandBuffer cmdBuffer)
304 {
305     const DeviceInterface &vkd                           = m_context.getDeviceInterface();
306     const VkDevice device                                = m_context.getDevice();
307     Allocator &allocator                                 = m_context.getDefaultAllocator();
308     tcu::UVec2 startPos                                  = tcu::UVec2(0u, 0u);
309     de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
310 
311     result->setBuildType(m_data.buildType);
312     result->setDeferredOperation(m_data.deferredOperation);
313     result->setGeometryCount(m_data.geometriesGroupCount);
314 
315     for (size_t geometryNdx = 0; geometryNdx < m_data.geometriesGroupCount; ++geometryNdx)
316     {
317         std::vector<tcu::Vec3> geometryData;
318 
319         geometryData.reserve(m_data.squaresGroupCount * 3u);
320 
321         for (size_t squareNdx = 0; squareNdx < m_data.squaresGroupCount; ++squareNdx)
322         {
323             const uint32_t n = m_data.width * startPos.y() + startPos.x();
324             const uint32_t m = (13 * (n + 1)) % (m_data.width * m_data.height);
325             const float x0   = float(startPos.x() + 0) / float(m_data.width);
326             const float y0   = float(startPos.y() + 0) / float(m_data.height);
327             const float x1   = float(startPos.x() + 1) / float(m_data.width);
328             const float y1   = float(startPos.y() + 1) / float(m_data.height);
329             const float xm   = (x0 + x1) / 2.0f;
330             const float ym   = (y0 + y1) / 2.0f;
331 
332             geometryData.push_back(tcu::Vec3(x0, y0, -1.0f));
333             geometryData.push_back(tcu::Vec3(xm, y1, -1.0f));
334             geometryData.push_back(tcu::Vec3(x1, ym, -1.0f));
335 
336             startPos.y() = m / m_data.width;
337             startPos.x() = m % m_data.width;
338         }
339 
340         result->addGeometry(geometryData, true);
341     }
342 
343     result->createAndBuild(vkd, device, cmdBuffer, allocator);
344 
345     return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
346 }
347 
runTest(const uint32_t threadCount)348 de::MovePtr<BufferWithMemory> RayTracingBuildLargeTestInstance::runTest(const uint32_t threadCount)
349 {
350     const InstanceInterface &vki            = m_context.getInstanceInterface();
351     const DeviceInterface &vkd              = m_context.getDeviceInterface();
352     const VkDevice device                   = m_context.getDevice();
353     const VkPhysicalDevice physicalDevice   = m_context.getPhysicalDevice();
354     const uint32_t queueFamilyIndex         = m_context.getUniversalQueueFamilyIndex();
355     const VkQueue queue                     = m_context.getUniversalQueue();
356     Allocator &allocator                    = m_context.getDefaultAllocator();
357     const VkFormat format                   = VK_FORMAT_R32_UINT;
358     const uint32_t pixelCount               = m_data.width * m_data.height;
359     const uint32_t callableShaderCount      = m_data.width * m_data.height;
360     const uint32_t shaderGroupHandleSize    = getShaderGroupSize(vki, physicalDevice);
361     const uint32_t shaderGroupBaseAlignment = getShaderGroupBaseAlignment(vki, physicalDevice);
362 
363     const Move<VkDescriptorSetLayout> descriptorSetLayout =
364         DescriptorSetLayoutBuilder()
365             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
366             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
367             .build(vkd, device);
368     const Move<VkDescriptorPool> descriptorPool =
369         DescriptorPoolBuilder()
370             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
371             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
372             .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
373     const Move<VkDescriptorSet> descriptorSet   = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
374     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
375     const Move<VkCommandPool> cmdPool           = createCommandPool(vkd, device, 0, queueFamilyIndex);
376     const Move<VkCommandBuffer> cmdBuffer =
377         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
378 
379     de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
380     const Move<VkPipeline> pipeline =
381         makePipeline(vkd, device, m_context.getBinaryCollection(), rayTracingPipeline, *pipelineLayout,
382                      callableShaderCount, m_data.deferredOperation, threadCount);
383     const de::MovePtr<BufferWithMemory> raygenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
384         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1u);
385     const de::MovePtr<BufferWithMemory> callableShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
386         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1u, callableShaderCount);
387     const VkStridedDeviceAddressRegionKHR raygenShaderBindingTableRegion =
388         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0),
389                                           shaderGroupHandleSize, shaderGroupHandleSize);
390     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
391         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
392     const VkStridedDeviceAddressRegionKHR hitShaderBindingTableRegion =
393         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
394     const VkStridedDeviceAddressRegionKHR callableShaderBindingTableRegion =
395         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callableShaderBindingTable->get(), 0),
396                                           shaderGroupHandleSize, shaderGroupHandleSize * callableShaderCount);
397 
398     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_data.width, m_data.height, format);
399     const VkImageSubresourceRange imageSubresourceRange =
400         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
401     const de::MovePtr<ImageWithMemory> image = de::MovePtr<ImageWithMemory>(
402         new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
403     const Move<VkImageView> imageView =
404         makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, format, imageSubresourceRange);
405 
406     const VkBufferCreateInfo bufferCreateInfo =
407         makeBufferCreateInfo(pixelCount * sizeof(uint32_t), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
408     const VkImageSubresourceLayers bufferImageSubresourceLayers =
409         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
410     const VkBufferImageCopy bufferImageRegion =
411         makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), bufferImageSubresourceLayers);
412     de::MovePtr<BufferWithMemory> buffer = de::MovePtr<BufferWithMemory>(
413         new BufferWithMemory(vkd, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible));
414 
415     const VkDescriptorImageInfo descriptorImageInfo =
416         makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
417 
418     const VkImageMemoryBarrier preImageBarrier =
419         makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
420                                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image, imageSubresourceRange);
421     const VkImageMemoryBarrier postImageBarrier = makeImageMemoryBarrier(
422         VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
423         VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image, imageSubresourceRange);
424     const VkMemoryBarrier postTraceMemoryBarrier =
425         makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
426     const VkMemoryBarrier postCopyMemoryBarrier =
427         makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
428     const VkClearValue clearValue = makeClearValueColorU32(5u, 5u, 5u, 255u);
429 
430     de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure;
431     de::SharedPtr<TopLevelAccelerationStructure> topLevelAccelerationStructure;
432 
433     beginCommandBuffer(vkd, *cmdBuffer, 0u);
434     {
435         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
436                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
437         vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
438                                &imageSubresourceRange);
439         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
440                                       VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &postImageBarrier);
441 
442         bottomLevelAccelerationStructure = initBottomAccelerationStructure(*cmdBuffer);
443         topLevelAccelerationStructure    = initTopAccelerationStructure(*cmdBuffer, bottomLevelAccelerationStructure);
444 
445         const TopLevelAccelerationStructure *topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
446         VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
447             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
448             DE_NULL,                                                           //  const void* pNext;
449             1u,                                                                //  uint32_t accelerationStructureCount;
450             topLevelAccelerationStructurePtr->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
451         };
452 
453         DescriptorSetUpdateBuilder()
454             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
455                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
456             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
457                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
458             .update(vkd, device);
459 
460         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
461                                   &descriptorSet.get(), 0, DE_NULL);
462 
463         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
464 
465         cmdTraceRays(vkd, *cmdBuffer, &raygenShaderBindingTableRegion, &missShaderBindingTableRegion,
466                      &hitShaderBindingTableRegion, &callableShaderBindingTableRegion, m_data.width, m_data.height, 1);
467 
468         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
469                                  VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
470 
471         vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **buffer, 1u, &bufferImageRegion);
472 
473         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
474                                  &postCopyMemoryBarrier);
475     }
476     endCommandBuffer(vkd, *cmdBuffer);
477 
478     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
479 
480     invalidateMappedMemoryRange(vkd, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(),
481                                 pixelCount * sizeof(uint32_t));
482 
483     return buffer;
484 }
485 
checkSupportInInstance(void) const486 void RayTracingBuildLargeTestInstance::checkSupportInInstance(void) const
487 {
488     const InstanceInterface &vki                     = m_context.getInstanceInterface();
489     const VkPhysicalDevice physicalDevice            = m_context.getPhysicalDevice();
490     const vk::VkPhysicalDeviceProperties &properties = m_context.getDeviceProperties();
491     const uint32_t requiredAllocations =
492         8u + TopLevelAccelerationStructure::getRequiredAllocationCount() +
493         m_data.instancesGroupCount * BottomLevelAccelerationStructure::getRequiredAllocationCount();
494     de::MovePtr<RayTracingProperties> rayTracingProperties = makeRayTracingProperties(vki, physicalDevice);
495 
496     if (rayTracingProperties->getMaxPrimitiveCount() < m_data.squaresGroupCount)
497         TCU_THROW(NotSupportedError, "Triangles required more than supported");
498 
499     if (rayTracingProperties->getMaxGeometryCount() < m_data.geometriesGroupCount)
500         TCU_THROW(NotSupportedError, "Geometries required more than supported");
501 
502     if (rayTracingProperties->getMaxInstanceCount() < m_data.instancesGroupCount)
503         TCU_THROW(NotSupportedError, "Instances required more than supported");
504 
505     if (properties.limits.maxMemoryAllocationCount < requiredAllocations)
506         TCU_THROW(NotSupportedError, "Test requires more allocations allowed");
507 }
508 
validateBuffer(de::MovePtr<BufferWithMemory> buffer)509 uint32_t RayTracingBuildLargeTestInstance::validateBuffer(de::MovePtr<BufferWithMemory> buffer)
510 {
511     const uint32_t *bufferPtr = (uint32_t *)buffer->getAllocation().getHostPtr();
512     uint32_t failures         = 0;
513     uint32_t pos              = 0;
514 
515     for (uint32_t y = 0; y < m_data.height; ++y)
516         for (uint32_t x = 0; x < m_data.width; ++x)
517         {
518             const uint32_t expectedValue = (m_data.width * (y / 3) + x) % 199;
519 
520             if (bufferPtr[pos] != expectedValue)
521                 failures++;
522 
523             ++pos;
524         }
525 
526     return failures;
527 }
528 
iterateNoWorkers(void)529 uint32_t RayTracingBuildLargeTestInstance::iterateNoWorkers(void)
530 {
531     de::MovePtr<BufferWithMemory> buffer = runTest(0);
532     const uint32_t failures              = validateBuffer(buffer);
533 
534     return failures;
535 }
536 
iterateWithWorkers(void)537 uint32_t RayTracingBuildLargeTestInstance::iterateWithWorkers(void)
538 {
539     de::MovePtr<BufferWithMemory> singleThreadBuffer = runTest(0);
540     const uint32_t singleThreadFailures              = validateBuffer(singleThreadBuffer);
541     de::MovePtr<BufferWithMemory> multiThreadBuffer  = runTest(m_data.workerThreadsCount);
542     const uint32_t multiThreadFailures               = validateBuffer(multiThreadBuffer);
543     const uint32_t failures                          = singleThreadFailures + multiThreadFailures;
544 
545     return failures;
546 }
547 
iterate(void)548 tcu::TestStatus RayTracingBuildLargeTestInstance::iterate(void)
549 {
550     checkSupportInInstance();
551 
552     const uint32_t failures = m_data.workerThreadsCount == 0 ? iterateNoWorkers() : iterateWithWorkers();
553 
554     if (failures == 0)
555         return tcu::TestStatus::pass("Pass");
556     else
557         return tcu::TestStatus::fail("failures=" + de::toString(failures));
558 }
559 
560 } // namespace
561 
createBuildLargeShaderSetTests(tcu::TestContext & testCtx)562 tcu::TestCaseGroup *createBuildLargeShaderSetTests(tcu::TestContext &testCtx)
563 {
564     // Build large shader set using CPU host threading
565     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "large_shader_set"));
566 
567     const uint32_t sizes[] = {8, 16, 32, 64};
568     const struct
569     {
570         const char *buildTypeName;
571         bool deferredOperation;
572         const VkAccelerationStructureBuildTypeKHR buildType;
573     } buildTypes[] = {
574         {"gpu", false, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR},
575         {"cpu_ht", true, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR},
576     };
577     const uint32_t threads[] = {1, 2, 3, 4, 8, std::numeric_limits<uint32_t>::max()};
578 
579     for (size_t buildNdx = 0; buildNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildNdx)
580     {
581         de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(
582             new tcu::TestCaseGroup(testCtx, buildTypes[buildNdx].buildTypeName));
583 
584         for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
585         {
586             const uint32_t largestGroup         = sizes[sizesNdx] * sizes[sizesNdx];
587             const uint32_t squaresGroupCount    = largestGroup;
588             const uint32_t geometriesGroupCount = 1;
589             const uint32_t instancesGroupCount  = 1;
590             const CaseDef caseDef               = {
591                 sizes[sizesNdx],                        //  uint32_t width;
592                 sizes[sizesNdx],                        //  uint32_t height;
593                 squaresGroupCount,                      //  uint32_t squaresGroupCount;
594                 geometriesGroupCount,                   //  uint32_t geometriesGroupCount;
595                 instancesGroupCount,                    //  uint32_t instancesGroupCount;
596                 buildTypes[buildNdx].deferredOperation, //  bool deferredOperation;
597                 buildTypes[buildNdx].buildType,         //  VkAccelerationStructureBuildTypeKHR buildType;
598                 0,                                      //  uint32_t threadsCount;
599             };
600             const std::string testName = de::toString(largestGroup);
601 
602             buildTypeGroup->addChild(new RayTracingTestCase(testCtx, testName.c_str(), caseDef));
603         }
604 
605         group->addChild(buildTypeGroup.release());
606     }
607 
608     for (size_t threadsNdx = 0; threadsNdx < DE_LENGTH_OF_ARRAY(threads); ++threadsNdx)
609     {
610         for (size_t buildNdx = 0; buildNdx < DE_LENGTH_OF_ARRAY(buildTypes); ++buildNdx)
611         {
612             if (buildTypes[buildNdx].buildType != VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR)
613                 continue;
614 
615             const std::string suffix =
616                 threads[threadsNdx] == std::numeric_limits<uint32_t>::max() ? "max" : de::toString(threads[threadsNdx]);
617             const std::string buildTypeGroupName = std::string(buildTypes[buildNdx].buildTypeName) + '_' + suffix;
618             de::MovePtr<tcu::TestCaseGroup> buildTypeGroup(new tcu::TestCaseGroup(testCtx, buildTypeGroupName.c_str()));
619 
620             for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
621             {
622                 const uint32_t largestGroup         = sizes[sizesNdx] * sizes[sizesNdx];
623                 const uint32_t squaresGroupCount    = largestGroup;
624                 const uint32_t geometriesGroupCount = 1;
625                 const uint32_t instancesGroupCount  = 1;
626                 const CaseDef caseDef               = {
627                     sizes[sizesNdx],                        //  uint32_t width;
628                     sizes[sizesNdx],                        //  uint32_t height;
629                     squaresGroupCount,                      //  uint32_t squaresGroupCount;
630                     geometriesGroupCount,                   //  uint32_t geometriesGroupCount;
631                     instancesGroupCount,                    //  uint32_t instancesGroupCount;
632                     buildTypes[buildNdx].deferredOperation, //  bool deferredOperation;
633                     buildTypes[buildNdx].buildType, //  VkAccelerationStructureBuildTypeKHR buildType;
634                     threads[threadsNdx],            //  uint32_t workerThreadsCount;
635                 };
636                 const std::string testName = de::toString(largestGroup);
637 
638                 buildTypeGroup->addChild(new RayTracingTestCase(testCtx, testName.c_str(), caseDef));
639             }
640 
641             group->addChild(buildTypeGroup.release());
642         }
643     }
644 
645     return group.release();
646 }
647 
648 } // namespace RayTracing
649 } // namespace vkt
650