1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktSparseResourcesBufferSparseResidency.cpp
21 * \brief Sparse partially resident buffers tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferSparseResidency.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41 #include "vkObjUtil.hpp"
42
43 #include "deStringUtil.hpp"
44 #include "deUniquePtr.hpp"
45
46 #include <string>
47 #include <vector>
48
49 using namespace vk;
50
51 namespace vkt
52 {
53 namespace sparse
54 {
55 namespace
56 {
57
58 enum ShaderParameters
59 {
60 SIZE_OF_UINT_IN_SHADER = 4u,
61 };
62
63 class BufferSparseResidencyCase : public TestCase
64 {
65 public:
66 BufferSparseResidencyCase(tcu::TestContext &testCtx, const std::string &name, const uint32_t bufferSize,
67 const glu::GLSLVersion glslVersion, const bool useDeviceGroups);
68
69 void initPrograms(SourceCollections &sourceCollections) const;
70 TestInstance *createInstance(Context &context) const;
71
72 private:
73 const uint32_t m_bufferSize;
74 const glu::GLSLVersion m_glslVersion;
75 const bool m_useDeviceGroups;
76 };
77
BufferSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const uint32_t bufferSize,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)78 BufferSparseResidencyCase::BufferSparseResidencyCase(tcu::TestContext &testCtx, const std::string &name,
79 const uint32_t bufferSize, const glu::GLSLVersion glslVersion,
80 const bool useDeviceGroups)
81
82 : TestCase(testCtx, name)
83 , m_bufferSize(bufferSize)
84 , m_glslVersion(glslVersion)
85 , m_useDeviceGroups(useDeviceGroups)
86 {
87 }
88
initPrograms(SourceCollections & sourceCollections) const89 void BufferSparseResidencyCase::initPrograms(SourceCollections &sourceCollections) const
90 {
91 const char *const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
92 const uint32_t iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
93
94 std::ostringstream src;
95
96 src << versionDecl << "\n"
97 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
98 << "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
99 << "{\n"
100 << " uint data[];\n"
101 << "} sb_in;\n"
102 << "\n"
103 << "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
104 << "{\n"
105 << " uint result[];\n"
106 << "} sb_out;\n"
107 << "\n"
108 << "void main (void)\n"
109 << "{\n"
110 << " for(int i=0; i<" << iterationsCount << "; ++i) \n"
111 << " {\n"
112 << " sb_out.result[i] = sb_in.data[i];"
113 << " }\n"
114 << "}\n";
115
116 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
117 }
118
119 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
120 {
121 public:
122 BufferSparseResidencyInstance(Context &context, const uint32_t bufferSize, const bool useDeviceGroups);
123
124 tcu::TestStatus iterate(void);
125
126 private:
127 const uint32_t m_bufferSize;
128 };
129
BufferSparseResidencyInstance(Context & context,const uint32_t bufferSize,const bool useDeviceGroups)130 BufferSparseResidencyInstance::BufferSparseResidencyInstance(Context &context, const uint32_t bufferSize,
131 const bool useDeviceGroups)
132 : SparseResourcesBaseInstance(context, useDeviceGroups)
133 , m_bufferSize(bufferSize)
134 {
135 }
136
iterate(void)137 tcu::TestStatus BufferSparseResidencyInstance::iterate(void)
138 {
139 const InstanceInterface &instance = m_context.getInstanceInterface();
140 {
141 // Create logical device supporting both sparse and compute operations
142 QueueRequirementsVec queueRequirements;
143 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
144 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
145
146 createDeviceSupportingQueues(queueRequirements);
147 }
148 const VkPhysicalDevice physicalDevice = getPhysicalDevice();
149 const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
150
151 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer)
152 TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported");
153
154 const DeviceInterface &deviceInterface = getDeviceInterface();
155 const Queue &sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
156 const Queue &computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
157
158 // Go through all physical devices
159 for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
160 {
161 const uint32_t firstDeviceID = physDevID;
162 const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
163
164 VkBufferCreateInfo bufferCreateInfo = {
165 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
166 DE_NULL, // const void* pNext;
167 VK_BUFFER_CREATE_SPARSE_BINDING_BIT | VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, // VkBufferCreateFlags flags;
168 m_bufferSize, // VkDeviceSize size;
169 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // VkBufferUsageFlags usage;
170 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
171 0u, // uint32_t queueFamilyIndexCount;
172 DE_NULL // const uint32_t* pQueueFamilyIndices;
173 };
174
175 const uint32_t queueFamilyIndices[] = {sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex};
176
177 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
178 {
179 bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
180 bufferCreateInfo.queueFamilyIndexCount = 2u;
181 bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices;
182 }
183
184 // Create sparse buffer
185 const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
186
187 // Create sparse buffer memory bind semaphore
188 const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
189
190 const VkMemoryRequirements bufferMemRequirements =
191 getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer);
192
193 if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
194 TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
195
196 DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
197
198 const uint32_t numSparseSlots =
199 static_cast<uint32_t>(bufferMemRequirements.size / bufferMemRequirements.alignment);
200 std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
201
202 {
203 std::vector<VkSparseMemoryBind> sparseMemoryBinds;
204 const uint32_t memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID),
205 bufferMemRequirements, MemoryRequirement::Any);
206
207 if (memoryType == NO_MATCH_FOUND)
208 return tcu::TestStatus::fail("No matching memory type found");
209
210 if (firstDeviceID != secondDeviceID)
211 {
212 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
213 const uint32_t heapIndex =
214 getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
215 deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID,
216 &peerMemoryFeatureFlags);
217
218 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT) == 0) ||
219 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
220 {
221 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
222 }
223 }
224
225 for (uint32_t sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
226 {
227 const VkSparseMemoryBind sparseMemoryBind =
228 makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType,
229 bufferMemRequirements.alignment * sparseBindNdx);
230
231 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(
232 Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory),
233 Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
234
235 sparseMemoryBinds.push_back(sparseMemoryBind);
236 }
237
238 const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(
239 *sparseBuffer, static_cast<uint32_t>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]);
240
241 const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = {
242 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO, //VkStructureType sType;
243 DE_NULL, //const void* pNext;
244 firstDeviceID, //uint32_t resourceDeviceIndex;
245 secondDeviceID, //uint32_t memoryDeviceIndex;
246 };
247 const VkBindSparseInfo bindSparseInfo = {
248 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
249 usingDeviceGroups() ? &devGroupBindSparseInfo : DE_NULL, //const void* pNext;
250 0u, //uint32_t waitSemaphoreCount;
251 DE_NULL, //const VkSemaphore* pWaitSemaphores;
252 1u, //uint32_t bufferBindCount;
253 &sparseBufferBindInfo, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
254 0u, //uint32_t imageOpaqueBindCount;
255 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
256 0u, //uint32_t imageBindCount;
257 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
258 1u, //uint32_t signalSemaphoreCount;
259 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
260 };
261
262 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
263 }
264
265 // Create input buffer
266 const VkBufferCreateInfo inputBufferCreateInfo =
267 makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
268 const Unique<VkBuffer> inputBuffer(createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
269 const de::UniquePtr<Allocation> inputBufferAlloc(
270 bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
271
272 std::vector<uint8_t> referenceData;
273 referenceData.resize(m_bufferSize);
274
275 for (uint32_t valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
276 {
277 referenceData[valueNdx] = static_cast<uint8_t>((valueNdx % bufferMemRequirements.alignment) + 1u);
278 }
279
280 deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize);
281
282 flushAlloc(deviceInterface, getDevice(), *inputBufferAlloc);
283
284 // Create output buffer
285 const VkBufferCreateInfo outputBufferCreateInfo =
286 makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
287 const Unique<VkBuffer> outputBuffer(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
288 const de::UniquePtr<Allocation> outputBufferAlloc(
289 bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
290
291 // Create command buffer for compute and data transfer operations
292 const Unique<VkCommandPool> commandPool(
293 makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
294 const Unique<VkCommandBuffer> commandBuffer(
295 allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
296
297 // Start recording compute and transfer commands
298 beginCommandBuffer(deviceInterface, *commandBuffer);
299
300 // Create descriptor set
301 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
302 DescriptorSetLayoutBuilder()
303 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
304 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
305 .build(deviceInterface, getDevice()));
306
307 // Create compute pipeline
308 const Unique<VkShaderModule> shaderModule(
309 createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
310 const Unique<VkPipelineLayout> pipelineLayout(
311 makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
312 const Unique<VkPipeline> computePipeline(
313 makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
314
315 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
316
317 const Unique<VkDescriptorPool> descriptorPool(
318 DescriptorPoolBuilder()
319 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
320 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
321
322 const Unique<VkDescriptorSet> descriptorSet(
323 makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
324
325 {
326 const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize);
327 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
328
329 DescriptorSetUpdateBuilder()
330 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
331 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
332 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
333 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
334 .update(deviceInterface, getDevice());
335 }
336
337 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
338 &descriptorSet.get(), 0u, DE_NULL);
339
340 {
341 const VkBufferMemoryBarrier inputBufferBarrier = makeBufferMemoryBarrier(
342 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, m_bufferSize);
343
344 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT,
345 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u,
346 &inputBufferBarrier, 0u, DE_NULL);
347 }
348
349 deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
350
351 {
352 const VkBufferMemoryBarrier sparseBufferBarrier = makeBufferMemoryBarrier(
353 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *sparseBuffer, 0ull, m_bufferSize);
354
355 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
356 VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u,
357 &sparseBufferBarrier, 0u, DE_NULL);
358 }
359
360 {
361 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
362
363 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy);
364 }
365
366 {
367 const VkBufferMemoryBarrier outputBufferBarrier = makeBufferMemoryBarrier(
368 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, m_bufferSize);
369
370 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
371 VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier,
372 0u, DE_NULL);
373 }
374
375 // End recording compute and transfer commands
376 endCommandBuffer(deviceInterface, *commandBuffer);
377
378 const VkPipelineStageFlags waitStageBits[] = {VK_PIPELINE_STAGE_TRANSFER_BIT};
379
380 // Submit transfer commands for execution and wait for completion
381 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u,
382 &bufferMemoryBindSemaphore.get(), waitStageBits, 0, DE_NULL, usingDeviceGroups(),
383 firstDeviceID);
384
385 // Retrieve data from output buffer to host memory
386 invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
387
388 const uint8_t *outputData = static_cast<const uint8_t *>(outputBufferAlloc->getHostPtr());
389
390 // Wait for sparse queue to become idle
391 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
392
393 // Compare output data with reference data
394 for (uint32_t sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx)
395 {
396 const uint32_t alignment = static_cast<uint32_t>(bufferMemRequirements.alignment);
397 const uint32_t offset = alignment * sparseBindNdx;
398 const uint32_t size = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
399
400 if (sparseBindNdx % 2u == 0u)
401 {
402 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
403 return tcu::TestStatus::fail("Failed");
404 }
405 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
406 {
407 deMemset(&referenceData[offset], 0u, size);
408
409 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
410 return tcu::TestStatus::fail("Failed");
411 }
412 }
413 }
414
415 return tcu::TestStatus::pass("Passed");
416 }
417
createInstance(Context & context) const418 TestInstance *BufferSparseResidencyCase::createInstance(Context &context) const
419 {
420 return new BufferSparseResidencyInstance(context, m_bufferSize, m_useDeviceGroups);
421 }
422
423 } // namespace
424
addBufferSparseResidencyTests(tcu::TestCaseGroup * group,const bool useDeviceGroups)425 void addBufferSparseResidencyTests(tcu::TestCaseGroup *group, const bool useDeviceGroups)
426 {
427 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", 1 << 10,
428 glu::GLSL_VERSION_440, useDeviceGroups));
429 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", 1 << 12,
430 glu::GLSL_VERSION_440, useDeviceGroups));
431 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", 1 << 16,
432 glu::GLSL_VERSION_440, useDeviceGroups));
433 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", 1 << 17,
434 glu::GLSL_VERSION_440, useDeviceGroups));
435 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", 1 << 20,
436 glu::GLSL_VERSION_440, useDeviceGroups));
437 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", 1 << 24,
438 glu::GLSL_VERSION_440, useDeviceGroups));
439 }
440
441 } // namespace sparse
442 } // namespace vkt
443