1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesBufferSparseResidency.cpp
21  * \brief Sparse partially resident buffers tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferSparseResidency.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41 #include "vkObjUtil.hpp"
42 
43 #include "deStringUtil.hpp"
44 #include "deUniquePtr.hpp"
45 
46 #include <string>
47 #include <vector>
48 
49 using namespace vk;
50 
51 namespace vkt
52 {
53 namespace sparse
54 {
55 namespace
56 {
57 
58 enum ShaderParameters
59 {
60     SIZE_OF_UINT_IN_SHADER = 4u,
61 };
62 
63 class BufferSparseResidencyCase : public TestCase
64 {
65 public:
66     BufferSparseResidencyCase(tcu::TestContext &testCtx, const std::string &name, const uint32_t bufferSize,
67                               const glu::GLSLVersion glslVersion, const bool useDeviceGroups);
68 
69     void initPrograms(SourceCollections &sourceCollections) const;
70     TestInstance *createInstance(Context &context) const;
71 
72 private:
73     const uint32_t m_bufferSize;
74     const glu::GLSLVersion m_glslVersion;
75     const bool m_useDeviceGroups;
76 };
77 
BufferSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const uint32_t bufferSize,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)78 BufferSparseResidencyCase::BufferSparseResidencyCase(tcu::TestContext &testCtx, const std::string &name,
79                                                      const uint32_t bufferSize, const glu::GLSLVersion glslVersion,
80                                                      const bool useDeviceGroups)
81 
82     : TestCase(testCtx, name)
83     , m_bufferSize(bufferSize)
84     , m_glslVersion(glslVersion)
85     , m_useDeviceGroups(useDeviceGroups)
86 {
87 }
88 
initPrograms(SourceCollections & sourceCollections) const89 void BufferSparseResidencyCase::initPrograms(SourceCollections &sourceCollections) const
90 {
91     const char *const versionDecl  = glu::getGLSLVersionDeclaration(m_glslVersion);
92     const uint32_t iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
93 
94     std::ostringstream src;
95 
96     src << versionDecl << "\n"
97         << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
98         << "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
99         << "{\n"
100         << "    uint data[];\n"
101         << "} sb_in;\n"
102         << "\n"
103         << "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
104         << "{\n"
105         << "    uint result[];\n"
106         << "} sb_out;\n"
107         << "\n"
108         << "void main (void)\n"
109         << "{\n"
110         << "    for(int i=0; i<" << iterationsCount << "; ++i) \n"
111         << "    {\n"
112         << "        sb_out.result[i] = sb_in.data[i];"
113         << "    }\n"
114         << "}\n";
115 
116     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
117 }
118 
119 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
120 {
121 public:
122     BufferSparseResidencyInstance(Context &context, const uint32_t bufferSize, const bool useDeviceGroups);
123 
124     tcu::TestStatus iterate(void);
125 
126 private:
127     const uint32_t m_bufferSize;
128 };
129 
BufferSparseResidencyInstance(Context & context,const uint32_t bufferSize,const bool useDeviceGroups)130 BufferSparseResidencyInstance::BufferSparseResidencyInstance(Context &context, const uint32_t bufferSize,
131                                                              const bool useDeviceGroups)
132     : SparseResourcesBaseInstance(context, useDeviceGroups)
133     , m_bufferSize(bufferSize)
134 {
135 }
136 
iterate(void)137 tcu::TestStatus BufferSparseResidencyInstance::iterate(void)
138 {
139     const InstanceInterface &instance = m_context.getInstanceInterface();
140     {
141         // Create logical device supporting both sparse and compute operations
142         QueueRequirementsVec queueRequirements;
143         queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
144         queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
145 
146         createDeviceSupportingQueues(queueRequirements);
147     }
148     const VkPhysicalDevice physicalDevice                     = getPhysicalDevice();
149     const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
150 
151     if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer)
152         TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported");
153 
154     const DeviceInterface &deviceInterface = getDeviceInterface();
155     const Queue &sparseQueue               = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
156     const Queue &computeQueue              = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
157 
158     // Go through all physical devices
159     for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
160     {
161         const uint32_t firstDeviceID  = physDevID;
162         const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
163 
164         VkBufferCreateInfo bufferCreateInfo = {
165             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,                                        // VkStructureType sType;
166             DE_NULL,                                                                     // const void* pNext;
167             VK_BUFFER_CREATE_SPARSE_BINDING_BIT | VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, // VkBufferCreateFlags flags;
168             m_bufferSize,                                                                // VkDeviceSize size;
169             VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,       // VkBufferUsageFlags usage;
170             VK_SHARING_MODE_EXCLUSIVE,                                                   // VkSharingMode sharingMode;
171             0u,     // uint32_t queueFamilyIndexCount;
172             DE_NULL // const uint32_t* pQueueFamilyIndices;
173         };
174 
175         const uint32_t queueFamilyIndices[] = {sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex};
176 
177         if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
178         {
179             bufferCreateInfo.sharingMode           = VK_SHARING_MODE_CONCURRENT;
180             bufferCreateInfo.queueFamilyIndexCount = 2u;
181             bufferCreateInfo.pQueueFamilyIndices   = queueFamilyIndices;
182         }
183 
184         // Create sparse buffer
185         const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
186 
187         // Create sparse buffer memory bind semaphore
188         const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
189 
190         const VkMemoryRequirements bufferMemRequirements =
191             getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer);
192 
193         if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
194             TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
195 
196         DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
197 
198         const uint32_t numSparseSlots =
199             static_cast<uint32_t>(bufferMemRequirements.size / bufferMemRequirements.alignment);
200         std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
201 
202         {
203             std::vector<VkSparseMemoryBind> sparseMemoryBinds;
204             const uint32_t memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID),
205                                                                bufferMemRequirements, MemoryRequirement::Any);
206 
207             if (memoryType == NO_MATCH_FOUND)
208                 return tcu::TestStatus::fail("No matching memory type found");
209 
210             if (firstDeviceID != secondDeviceID)
211             {
212                 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
213                 const uint32_t heapIndex =
214                     getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
215                 deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID,
216                                                                  &peerMemoryFeatureFlags);
217 
218                 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT) == 0) ||
219                     ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
220                 {
221                     TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
222                 }
223             }
224 
225             for (uint32_t sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
226             {
227                 const VkSparseMemoryBind sparseMemoryBind =
228                     makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType,
229                                          bufferMemRequirements.alignment * sparseBindNdx);
230 
231                 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
232                     Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory),
233                                          Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
234 
235                 sparseMemoryBinds.push_back(sparseMemoryBind);
236             }
237 
238             const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(
239                 *sparseBuffer, static_cast<uint32_t>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]);
240 
241             const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = {
242                 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO, //VkStructureType sType;
243                 DE_NULL,                                         //const void* pNext;
244                 firstDeviceID,                                   //uint32_t resourceDeviceIndex;
245                 secondDeviceID,                                  //uint32_t memoryDeviceIndex;
246             };
247             const VkBindSparseInfo bindSparseInfo = {
248                 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                      //VkStructureType sType;
249                 usingDeviceGroups() ? &devGroupBindSparseInfo : DE_NULL, //const void* pNext;
250                 0u,                                                      //uint32_t waitSemaphoreCount;
251                 DE_NULL,                                                 //const VkSemaphore* pWaitSemaphores;
252                 1u,                                                      //uint32_t bufferBindCount;
253                 &sparseBufferBindInfo,           //const VkSparseBufferMemoryBindInfo* pBufferBinds;
254                 0u,                              //uint32_t imageOpaqueBindCount;
255                 DE_NULL,                         //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
256                 0u,                              //uint32_t imageBindCount;
257                 DE_NULL,                         //const VkSparseImageMemoryBindInfo* pImageBinds;
258                 1u,                              //uint32_t signalSemaphoreCount;
259                 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
260             };
261 
262             VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
263         }
264 
265         // Create input buffer
266         const VkBufferCreateInfo inputBufferCreateInfo =
267             makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
268         const Unique<VkBuffer> inputBuffer(createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
269         const de::UniquePtr<Allocation> inputBufferAlloc(
270             bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
271 
272         std::vector<uint8_t> referenceData;
273         referenceData.resize(m_bufferSize);
274 
275         for (uint32_t valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
276         {
277             referenceData[valueNdx] = static_cast<uint8_t>((valueNdx % bufferMemRequirements.alignment) + 1u);
278         }
279 
280         deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize);
281 
282         flushAlloc(deviceInterface, getDevice(), *inputBufferAlloc);
283 
284         // Create output buffer
285         const VkBufferCreateInfo outputBufferCreateInfo =
286             makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
287         const Unique<VkBuffer> outputBuffer(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
288         const de::UniquePtr<Allocation> outputBufferAlloc(
289             bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
290 
291         // Create command buffer for compute and data transfer operations
292         const Unique<VkCommandPool> commandPool(
293             makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
294         const Unique<VkCommandBuffer> commandBuffer(
295             allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
296 
297         // Start recording compute and transfer commands
298         beginCommandBuffer(deviceInterface, *commandBuffer);
299 
300         // Create descriptor set
301         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
302             DescriptorSetLayoutBuilder()
303                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
304                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
305                 .build(deviceInterface, getDevice()));
306 
307         // Create compute pipeline
308         const Unique<VkShaderModule> shaderModule(
309             createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
310         const Unique<VkPipelineLayout> pipelineLayout(
311             makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
312         const Unique<VkPipeline> computePipeline(
313             makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
314 
315         deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
316 
317         const Unique<VkDescriptorPool> descriptorPool(
318             DescriptorPoolBuilder()
319                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
320                 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
321 
322         const Unique<VkDescriptorSet> descriptorSet(
323             makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
324 
325         {
326             const VkDescriptorBufferInfo inputBufferInfo  = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize);
327             const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
328 
329             DescriptorSetUpdateBuilder()
330                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
331                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
332                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
333                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
334                 .update(deviceInterface, getDevice());
335         }
336 
337         deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
338                                               &descriptorSet.get(), 0u, DE_NULL);
339 
340         {
341             const VkBufferMemoryBarrier inputBufferBarrier = makeBufferMemoryBarrier(
342                 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, m_bufferSize);
343 
344             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT,
345                                                VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u,
346                                                &inputBufferBarrier, 0u, DE_NULL);
347         }
348 
349         deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
350 
351         {
352             const VkBufferMemoryBarrier sparseBufferBarrier = makeBufferMemoryBarrier(
353                 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *sparseBuffer, 0ull, m_bufferSize);
354 
355             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
356                                                VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u,
357                                                &sparseBufferBarrier, 0u, DE_NULL);
358         }
359 
360         {
361             const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
362 
363             deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy);
364         }
365 
366         {
367             const VkBufferMemoryBarrier outputBufferBarrier = makeBufferMemoryBarrier(
368                 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, m_bufferSize);
369 
370             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
371                                                VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier,
372                                                0u, DE_NULL);
373         }
374 
375         // End recording compute and transfer commands
376         endCommandBuffer(deviceInterface, *commandBuffer);
377 
378         const VkPipelineStageFlags waitStageBits[] = {VK_PIPELINE_STAGE_TRANSFER_BIT};
379 
380         // Submit transfer commands for execution and wait for completion
381         submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u,
382                               &bufferMemoryBindSemaphore.get(), waitStageBits, 0, DE_NULL, usingDeviceGroups(),
383                               firstDeviceID);
384 
385         // Retrieve data from output buffer to host memory
386         invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
387 
388         const uint8_t *outputData = static_cast<const uint8_t *>(outputBufferAlloc->getHostPtr());
389 
390         // Wait for sparse queue to become idle
391         deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
392 
393         // Compare output data with reference data
394         for (uint32_t sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx)
395         {
396             const uint32_t alignment = static_cast<uint32_t>(bufferMemRequirements.alignment);
397             const uint32_t offset    = alignment * sparseBindNdx;
398             const uint32_t size      = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
399 
400             if (sparseBindNdx % 2u == 0u)
401             {
402                 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
403                     return tcu::TestStatus::fail("Failed");
404             }
405             else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
406             {
407                 deMemset(&referenceData[offset], 0u, size);
408 
409                 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
410                     return tcu::TestStatus::fail("Failed");
411             }
412         }
413     }
414 
415     return tcu::TestStatus::pass("Passed");
416 }
417 
createInstance(Context & context) const418 TestInstance *BufferSparseResidencyCase::createInstance(Context &context) const
419 {
420     return new BufferSparseResidencyInstance(context, m_bufferSize, m_useDeviceGroups);
421 }
422 
423 } // namespace
424 
addBufferSparseResidencyTests(tcu::TestCaseGroup * group,const bool useDeviceGroups)425 void addBufferSparseResidencyTests(tcu::TestCaseGroup *group, const bool useDeviceGroups)
426 {
427     group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", 1 << 10,
428                                                   glu::GLSL_VERSION_440, useDeviceGroups));
429     group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", 1 << 12,
430                                                   glu::GLSL_VERSION_440, useDeviceGroups));
431     group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", 1 << 16,
432                                                   glu::GLSL_VERSION_440, useDeviceGroups));
433     group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", 1 << 17,
434                                                   glu::GLSL_VERSION_440, useDeviceGroups));
435     group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", 1 << 20,
436                                                   glu::GLSL_VERSION_440, useDeviceGroups));
437     group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", 1 << 24,
438                                                   glu::GLSL_VERSION_440, useDeviceGroups));
439 }
440 
441 } // namespace sparse
442 } // namespace vkt
443