1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Experimental crash postmortem shader timeout tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktPostmortemTests.hpp"
25 #include "vktPostmortemShaderTimeoutTests.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktTestCase.hpp"
28 #include "vkBarrierUtil.hpp"
29 #include "vkBufferWithMemory.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkDefs.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "deUniquePtr.hpp"
36 #include "tcuCommandLine.hpp"
37 #include "vktCustomInstancesDevices.hpp"
38 #include "vktPostmortemUtil.hpp"
39 
40 using namespace vk;
41 
42 namespace vkt
43 {
44 namespace postmortem
45 {
46 namespace
47 {
48 
49 class ShaderTimeoutCase : public vkt::TestCase
50 {
51 public:
ShaderTimeoutCase(tcu::TestContext & testCtx,const std::string & name,uint32_t iterations)52     ShaderTimeoutCase(tcu::TestContext &testCtx, const std::string &name, uint32_t iterations)
53         : TestCase(testCtx, name)
54         , m_iterations(iterations)
55     {
56     }
57 
58     TestInstance *createInstance(Context &context) const override;
59     void initPrograms(vk::SourceCollections &programCollection) const override;
60 
61 private:
62     uint32_t m_iterations;
63 };
64 
65 class ShaderTimeoutInstance : public PostmortemTestInstance
66 {
67 public:
68     ShaderTimeoutInstance(Context &context, uint32_t iterations);
69 
70     tcu::TestStatus iterate(void) override;
71 
72 private:
73     uint32_t m_iterations;
74 };
75 
ShaderTimeoutInstance(Context & context,uint32_t iterations)76 ShaderTimeoutInstance::ShaderTimeoutInstance(Context &context, uint32_t iterations)
77     : PostmortemTestInstance(context)
78     , m_iterations(iterations)
79 {
80 }
81 
createInstance(Context & context) const82 TestInstance *ShaderTimeoutCase::createInstance(Context &context) const
83 {
84     return new ShaderTimeoutInstance(context, m_iterations);
85 }
86 
initPrograms(vk::SourceCollections & programCollection) const87 void ShaderTimeoutCase::initPrograms(vk::SourceCollections &programCollection) const
88 {
89     std::ostringstream src;
90     src << "#version 320 es\n"
91         << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1)\n"
92         << "layout(binding = 0) uniform Params {\n"
93         << "  int x;\n"
94         << "  int y;\n"
95         << "} bounds;\n"
96         << "layout(std430, binding = 1) buffer  Output {\n"
97         << "  uint values[];\n"
98         << "} sb_out;\n"
99         << "\n"
100         << "void main()\n"
101         << "{\n"
102         << "  uint localSize = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n"
103         << "  uint globalNdx = gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupID.z + gl_NumWorkGroups.x * "
104            "gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
105         << "  uint globalOffs = localSize * globalNdx;\n"
106         << "  uint localOffs = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_LocalInvocationID.z + gl_WorkGroupSize.x * "
107            "gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
108         << "  uint sum = uint(0);\n"
109         << "  for (int y = 0; y < bounds.y; ++y) {\n"
110         << "    for (int x = 0; x < bounds.x; ++x) {\n"
111         << "      sb_out.values[globalOffs + localOffs] = sb_out.values[globalOffs + localOffs] + uint(1);\n"
112         << "      memoryBarrierBuffer();\n"
113         << "      barrier();\n"
114         << "    }\n"
115         << "  }\n"
116         << "}\n";
117 
118     programCollection.glslSources.add("comp") << glu::ComputeSource(src.str());
119 }
120 
iterate(void)121 tcu::TestStatus ShaderTimeoutInstance::iterate(void)
122 {
123     const VkDevice device           = *m_logicalDevice;
124     const DeviceInterface &vk       = m_deviceDriver;
125     const VkQueue queue             = m_queue;
126     const uint32_t queueFamilyIndex = m_queueFamilyIndex;
127     Allocator &allocator            = m_allocator;
128 
129     const int workSize                    = 1024;
130     const VkDeviceSize storageSizeInBytes = sizeof(uint32_t) * workSize;
131     const VkDeviceSize uniformSizeInBytes = sizeof(uint32_t) * 2;
132 
133     // Create storage and uniform buffers
134     BufferWithMemory storageBuffer(vk, device, allocator,
135                                    makeBufferCreateInfo(storageSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
136                                    MemoryRequirement::HostVisible);
137     BufferWithMemory uniformBuffer(vk, device, allocator,
138                                    makeBufferCreateInfo(uniformSizeInBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
139                                    MemoryRequirement::HostVisible);
140 
141     // Fill storage buffer with sequentially increasing values
142     {
143         const Allocation &storageBufferAllocation = storageBuffer.getAllocation();
144         uint32_t *storageBufferPtr                = static_cast<uint32_t *>(storageBufferAllocation.getHostPtr());
145         for (int i = 0; i < workSize; ++i)
146             storageBufferPtr[i] = i;
147 
148         flushAlloc(vk, device, storageBufferAllocation);
149     }
150 
151     // Set uniforms for shader loop bounds to m_iterations
152     {
153         const Allocation &uniformBufferAllocation = uniformBuffer.getAllocation();
154         uint32_t *uniformBufferPtr                = static_cast<uint32_t *>(uniformBufferAllocation.getHostPtr());
155         uniformBufferPtr[0]                       = m_iterations;
156         uniformBufferPtr[1]                       = m_iterations;
157 
158         flushAlloc(vk, device, uniformBufferAllocation);
159     }
160 
161     const Unique<VkDescriptorSetLayout> descriptorSetLayout(
162         DescriptorSetLayoutBuilder()
163             .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
164             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
165             .build(vk, device));
166 
167     const Unique<VkDescriptorPool> descriptorPool(
168         DescriptorPoolBuilder()
169             .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
170             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
171             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
172 
173     const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
174 
175     const VkDescriptorBufferInfo uniformDescriptorInfo =
176         makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformSizeInBytes);
177     const VkDescriptorBufferInfo storageDescriptorInfo =
178         makeDescriptorBufferInfo(*storageBuffer, 0ull, storageSizeInBytes);
179     DescriptorSetUpdateBuilder()
180         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
181                      VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformDescriptorInfo)
182         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
183                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &storageDescriptorInfo)
184         .update(vk, device);
185 
186     // Create pipelines
187     const Unique<VkShaderModule> shaderModule(
188         createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u));
189     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
190     const Unique<VkPipeline> pipeline(makeComputePipeline(vk, device, *pipelineLayout, *shaderModule));
191 
192     const VkBufferMemoryBarrier hostWriteBarriers[2] = {
193         makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *storageBuffer, 0ull,
194                                 storageSizeInBytes),
195         makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull,
196                                 uniformSizeInBytes)};
197     const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(
198         VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *storageBuffer, 0ull, storageSizeInBytes);
199 
200     // Create command buffer and launch dispatch,
201     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
202     const Unique<VkCommandBuffer> cmdBuffer(
203         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
204 
205     beginCommandBuffer(vk, *cmdBuffer);
206     vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
207     vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(),
208                              0u, DE_NULL);
209     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
210                           (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 2u, hostWriteBarriers, 0,
211                           (const VkImageMemoryBarrier *)DE_NULL);
212     vk.cmdDispatch(*cmdBuffer, workSize, 1, 1);
213     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
214                           (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1u, &computeFinishBarrier, 0,
215                           (const VkImageMemoryBarrier *)DE_NULL);
216     endCommandBuffer(vk, *cmdBuffer);
217 
218     submitCommandsAndWait(vk, device, queue, *cmdBuffer);
219 
220     // Verify output
221     const Allocation &storageAllocation = storageBuffer.getAllocation();
222     invalidateAlloc(vk, device, storageAllocation);
223 
224     const uint32_t *bufferPtr = static_cast<uint32_t *>(storageAllocation.getHostPtr());
225     for (int i = 0; i < workSize; ++i)
226     {
227         const uint32_t res = bufferPtr[i];
228         const uint32_t ref = i + m_iterations * m_iterations;
229         if (res != ref)
230         {
231             std::ostringstream msg;
232             msg << "Comparison failed for sb_out.values[" << i << "] ref:" << ref << " res:" << res;
233             return tcu::TestStatus::fail(msg.str());
234         }
235     }
236 
237     return tcu::TestStatus::pass("Test succeeded without device loss");
238 }
239 
240 } // namespace
241 
createShaderTimeoutTests(tcu::TestContext & testCtx)242 tcu::TestCaseGroup *createShaderTimeoutTests(tcu::TestContext &testCtx)
243 {
244     de::MovePtr<tcu::TestCaseGroup> timeoutGroup(new tcu::TestCaseGroup(testCtx, "shader_timeout"));
245     for (int i = 0; i < 16; ++i)
246     {
247         uint32_t iterations = 0x1u << i;
248         std::stringstream name;
249         name << "compute_" << iterations << "x" << iterations;
250         timeoutGroup->addChild(new ShaderTimeoutCase(testCtx, name.str(), iterations));
251     }
252 
253     return timeoutGroup.release();
254 }
255 
256 } // namespace postmortem
257 } // namespace vkt
258