1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Advanced Micro Devices, Inc.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests for VK_AMD_buffer_marker
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktApiBufferMarkerTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "vktExternalMemoryUtil.hpp"
30 #include "vktCustomInstancesDevices.hpp"
31 #include "vkPlatform.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "tcuCommandLine.hpp"
39 #include "deUniquePtr.hpp"
40 #include "deRandom.hpp"
41 #include "deSTLUtil.hpp"
42
43 #include <vector>
44
45 namespace vkt
46 {
47 namespace api
48 {
49 namespace
50 {
51 using namespace vk;
52 using de::MovePtr;
53 using de::UniquePtr;
54 using namespace vkt::ExternalMemoryUtil;
55
56 //! Common test data related to the device
57 struct WorkingDevice
58 {
59 Move<VkDevice> logicalDevice;
60 MovePtr<DeviceDriver> deviceDriver;
61 MovePtr<Allocator> allocator;
62 VkQueue queue;
63 uint32_t queueFamilyIdx;
64 VkQueueFamilyProperties queueProps;
65 };
66
queueFamilyMatchesTestCase(const VkQueueFamilyProperties & props,VkQueueFlagBits testQueue)67 bool queueFamilyMatchesTestCase(const VkQueueFamilyProperties &props, VkQueueFlagBits testQueue)
68 {
69 // The goal is to find a queue family that most accurately represents the required queue flag. For example, if flag is
70 // VK_QUEUE_TRANSFER_BIT, we want to target transfer-only queues for such a test case rather than universal queues which
71 // may include VK_QUEUE_TRANSFER_BIT along with other queue flags.
72 const VkQueueFlags flags =
73 props.queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT);
74
75 // for VK_QUEUE_TRANSFER_BIT, target transfer-only queues:
76 if (testQueue == VK_QUEUE_TRANSFER_BIT)
77 return (flags == VK_QUEUE_TRANSFER_BIT);
78
79 // for VK_QUEUE_COMPUTE_BIT, target compute only queues
80 if (testQueue == VK_QUEUE_COMPUTE_BIT)
81 return ((flags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) == VK_QUEUE_COMPUTE_BIT);
82
83 // for VK_QUEUE_GRAPHICS_BIT, target universal queues (queues which support graphics)
84 if (testQueue == VK_QUEUE_GRAPHICS_BIT)
85 return ((flags & VK_QUEUE_GRAPHICS_BIT) != 0);
86
87 DE_FATAL("Unexpected test queue flag");
88
89 return false;
90 }
91
92 // We create a custom device because we don't want to always use the universal queue.
createDeviceWithExtension(Context & context,WorkingDevice & wd,VkQueueFlagBits testQueue,bool hostPtr,size_t offset)93 void createDeviceWithExtension(Context &context, WorkingDevice &wd, VkQueueFlagBits testQueue, bool hostPtr,
94 size_t offset)
95 {
96 const PlatformInterface &vkp = context.getPlatformInterface();
97 const VkInstance instance = context.getInstance();
98 const InstanceInterface &instanceDriver = context.getInstanceInterface();
99 const VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
100 const auto useValidation = context.getTestContext().getCommandLine().isValidationEnabled();
101
102 // Create a device with extension enabled and a queue with a family which supports the buffer marker extension
103 const std::vector<VkQueueFamilyProperties> queueFamilyProperties =
104 getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
105 const float queuePriority = 1.0f;
106 VkDeviceQueueCreateInfo queueCreateInfo;
107 deMemset(&queueCreateInfo, 0, sizeof(queueCreateInfo));
108
109 for (uint32_t familyIdx = 0; familyIdx < queueFamilyProperties.size(); ++familyIdx)
110 {
111 if (queueFamilyMatchesTestCase(queueFamilyProperties[familyIdx], testQueue) &&
112 queueFamilyProperties[familyIdx].queueCount > 0)
113 {
114 queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
115 queueCreateInfo.pNext = DE_NULL;
116 queueCreateInfo.pQueuePriorities = &queuePriority;
117 queueCreateInfo.queueCount = 1;
118 queueCreateInfo.queueFamilyIndex = familyIdx;
119
120 break;
121 }
122 }
123
124 if (queueCreateInfo.queueCount == 0)
125 {
126 TCU_THROW(NotSupportedError, "No compatible queue family for this test case");
127 }
128
129 std::vector<const char *> cstrDeviceExtensions;
130
131 cstrDeviceExtensions.push_back("VK_AMD_buffer_marker");
132
133 if (hostPtr)
134 cstrDeviceExtensions.push_back("VK_EXT_external_memory_host");
135
136 const VkDeviceCreateInfo deviceInfo = {
137 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
138 DE_NULL, // const void* pNext;
139 0u, // VkDeviceCreateFlags flags;
140 1, // uint32_t queueCreateInfoCount;
141 &queueCreateInfo, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
142 0u, // uint32_t enabledLayerCount;
143 DE_NULL, // const char* const* ppEnabledLayerNames;
144 static_cast<uint32_t>(cstrDeviceExtensions.size()), // uint32_t enabledExtensionCount;
145 de::dataOrNull(cstrDeviceExtensions), // const char* const* ppEnabledExtensionNames;
146 &context.getDeviceFeatures(), // const VkPhysicalDeviceFeatures* pEnabledFeatures;
147 };
148
149 wd.logicalDevice = createCustomDevice(useValidation, vkp, instance, instanceDriver, physicalDevice, &deviceInfo);
150 wd.deviceDriver = MovePtr<DeviceDriver>(new DeviceDriver(
151 vkp, instance, *wd.logicalDevice, context.getUsedApiVersion(), context.getTestContext().getCommandLine()));
152 const SimpleAllocator::OptionalOffsetParams offsetParams(
153 {context.getDeviceProperties().limits.nonCoherentAtomSize, static_cast<VkDeviceSize>(offset)});
154 wd.allocator = MovePtr<Allocator>(
155 new SimpleAllocator(*wd.deviceDriver, *wd.logicalDevice,
156 getPhysicalDeviceMemoryProperties(instanceDriver, physicalDevice), offsetParams));
157 wd.queueFamilyIdx = queueCreateInfo.queueFamilyIndex;
158 wd.queue = getDeviceQueue(*wd.deviceDriver, *wd.logicalDevice, wd.queueFamilyIdx, 0u);
159 wd.queueProps = queueFamilyProperties[queueCreateInfo.queueFamilyIndex];
160 }
161
writeHostMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,vk::VkDeviceMemory memory,size_t size,size_t memorySize,const void * data)162 void writeHostMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, vk::VkDeviceMemory memory, size_t size,
163 size_t memorySize, const void *data)
164 {
165 void *const ptr = vk::mapMemory(vkd, device, memory, 0, memorySize, 0);
166
167 deMemcpy(ptr, data, size);
168
169 flushMappedMemoryRange(vkd, device, memory, 0, memorySize);
170
171 vkd.unmapMemory(device, memory);
172 }
173
invalidateHostMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,vk::VkDeviceMemory memory,size_t size)174 void invalidateHostMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, vk::VkDeviceMemory memory, size_t size)
175 {
176 vk::mapMemory(vkd, device, memory, 0, size, 0);
177
178 invalidateMappedMemoryRange(vkd, device, memory, 0, size);
179
180 vkd.unmapMemory(device, memory);
181 }
182
checkMarkerBuffer(const DeviceInterface & vk,VkDevice device,const MovePtr<vk::Allocation> & memory,const std::vector<uint32_t> & expected,size_t size,bool useHostMemory)183 bool checkMarkerBuffer(const DeviceInterface &vk, VkDevice device, const MovePtr<vk::Allocation> &memory,
184 const std::vector<uint32_t> &expected, size_t size, bool useHostMemory)
185 {
186 if (useHostMemory)
187 {
188 invalidateHostMemory(vk, device, memory->getMemory(), size);
189 }
190 else
191 {
192 invalidateAlloc(vk, device, *memory);
193 }
194
195 const uint32_t *data = reinterpret_cast<const uint32_t *>(static_cast<const char *>(memory->getHostPtr()));
196
197 for (size_t i = 0; i < expected.size(); ++i)
198 {
199 if (data[i] != expected[i])
200 return false;
201 }
202
203 return true;
204 }
205
206 struct BaseTestParams
207 {
208 VkQueueFlagBits testQueue; // Queue type that this test case targets
209 VkPipelineStageFlagBits stage; // Pipeline stage where any marker writes for this test case occur in
210 uint32_t size; // Number of buffer markers
211 bool useHostPtr; // Whether to use host pointer as backing buffer memory
212 size_t offset; // The offset of the data in the buffer
213 };
214
chooseExternalMarkerMemoryType(const DeviceInterface & vkd,VkDevice device,VkExternalMemoryHandleTypeFlagBits externalType,uint32_t allowedBits,MovePtr<ExternalHostMemory> & hostMemory)215 uint32_t chooseExternalMarkerMemoryType(const DeviceInterface &vkd, VkDevice device,
216 VkExternalMemoryHandleTypeFlagBits externalType, uint32_t allowedBits,
217 MovePtr<ExternalHostMemory> &hostMemory)
218 {
219 VkMemoryHostPointerPropertiesEXT props = {
220 vk::VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
221 DE_NULL,
222 0u,
223 };
224
225 if (vkd.getMemoryHostPointerPropertiesEXT(device, externalType, hostMemory->data, &props) == VK_SUCCESS)
226 {
227 allowedBits &= props.memoryTypeBits;
228 }
229
230 return deInt32BitScan((int32_t *)&allowedBits);
231 }
232
233 class ExternalHostAllocation : public Allocation
234 {
235 public:
ExternalHostAllocation(Move<VkDeviceMemory> mem,void * hostPtr,size_t offset)236 ExternalHostAllocation(Move<VkDeviceMemory> mem, void *hostPtr, size_t offset)
237 : Allocation(*mem, offset, hostPtr)
238 , m_memHolder(mem)
239 {
240 }
241
242 private:
243 const Unique<VkDeviceMemory> m_memHolder;
244 };
245
createMarkerBufferMemory(const InstanceInterface & vki,const DeviceInterface & vkd,VkPhysicalDevice physicalDevice,VkDevice device,VkBuffer buffer,size_t bufferOffset,MovePtr<Allocator> & allocator,const MemoryRequirement allocRequirement,bool externalHostPtr,MovePtr<ExternalHostMemory> & hostMemory,MovePtr<Allocation> & deviceMemory)246 void createMarkerBufferMemory(const InstanceInterface &vki, const DeviceInterface &vkd, VkPhysicalDevice physicalDevice,
247 VkDevice device, VkBuffer buffer, size_t bufferOffset, MovePtr<Allocator> &allocator,
248 const MemoryRequirement allocRequirement, bool externalHostPtr,
249 MovePtr<ExternalHostMemory> &hostMemory, MovePtr<Allocation> &deviceMemory)
250 {
251 VkMemoryRequirements memReqs = getBufferMemoryRequirements(vkd, device, buffer);
252
253 if (externalHostPtr == false)
254 {
255 deviceMemory = allocator->allocate(memReqs, allocRequirement);
256 }
257 else
258 {
259 const VkExternalMemoryHandleTypeFlagBits externalType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
260
261 const VkPhysicalDeviceExternalMemoryHostPropertiesEXT hostProps =
262 getPhysicalDeviceExternalMemoryHostProperties(vki, physicalDevice);
263 bufferOffset = deAlignSize(bufferOffset, static_cast<size_t>(memReqs.alignment));
264 hostMemory = MovePtr<ExternalHostMemory>(
265 new ExternalHostMemory(memReqs.size + bufferOffset, hostProps.minImportedHostPointerAlignment));
266
267 const uint32_t externalMemType =
268 chooseExternalMarkerMemoryType(vkd, device, externalType, memReqs.memoryTypeBits, hostMemory);
269
270 if (externalMemType == VK_MAX_MEMORY_TYPES)
271 {
272 TCU_FAIL("Failed to find compatible external host memory type for marker buffer");
273 }
274
275 const VkImportMemoryHostPointerInfoEXT importInfo = {VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
276 DE_NULL, externalType, hostMemory->data};
277
278 const VkMemoryAllocateInfo info = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, (const void *)&importInfo,
279 hostMemory->size, externalMemType};
280
281 deviceMemory = MovePtr<Allocation>(new ExternalHostAllocation(
282 allocateMemory(vkd, device, &info), (((uint8_t *)hostMemory->data) + bufferOffset), bufferOffset));
283 }
284
285 VK_CHECK(vkd.bindBufferMemory(device, buffer, deviceMemory->getMemory(), deviceMemory->getOffset()));
286 }
287
bufferMarkerSequential(Context & context,BaseTestParams params)288 tcu::TestStatus bufferMarkerSequential(Context &context, BaseTestParams params)
289 {
290 WorkingDevice wd;
291
292 createDeviceWithExtension(context, wd, params.testQueue, params.useHostPtr, params.offset);
293
294 const DeviceInterface &vk(*wd.deviceDriver);
295 const VkDevice device(*wd.logicalDevice);
296 const VkDeviceSize markerBufferSize(params.size * sizeof(uint32_t));
297 VkExternalMemoryBufferCreateInfo externalMemoryBufferCreateInfo = {
298 VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, DE_NULL,
299 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT};
300 VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(markerBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
301 if (params.useHostPtr)
302 bufferCreateInfo.pNext = &externalMemoryBufferCreateInfo;
303 Move<VkBuffer> markerBuffer(createBuffer(vk, device, &bufferCreateInfo));
304 MovePtr<ExternalHostMemory> hostMemory;
305 MovePtr<Allocation> markerMemory;
306
307 createMarkerBufferMemory(context.getInstanceInterface(), vk, context.getPhysicalDevice(), device, *markerBuffer,
308 params.offset, wd.allocator, MemoryRequirement::HostVisible, params.useHostPtr, hostMemory,
309 markerMemory);
310
311 de::Random rng(12345 ^ params.size);
312 std::vector<uint32_t> expected(params.size);
313
314 for (size_t i = 0; i < params.size; ++i)
315 expected[i] = rng.getUint32();
316
317 if (params.useHostPtr)
318 {
319 writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size,
320 &expected[0]);
321 }
322 else
323 {
324 deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
325 flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
326 }
327
328 const Unique<VkCommandPool> cmdPool(
329 createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, wd.queueFamilyIdx));
330 const Unique<VkCommandBuffer> cmdBuffer(
331 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
332
333 beginCommandBuffer(vk, *cmdBuffer);
334
335 for (size_t i = 0; i < params.size; ++i)
336 {
337 vk.cmdWriteBufferMarkerAMD(*cmdBuffer, params.stage, *markerBuffer,
338 static_cast<VkDeviceSize>(sizeof(uint32_t) * i), expected[i]);
339 }
340
341 const VkMemoryBarrier memoryDep = {
342 VK_STRUCTURE_TYPE_MEMORY_BARRIER,
343 DE_NULL,
344 VK_ACCESS_TRANSFER_WRITE_BIT,
345 VK_ACCESS_HOST_READ_BIT,
346 };
347
348 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memoryDep, 0,
349 DE_NULL, 0, DE_NULL);
350
351 VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
352
353 submitCommandsAndWait(vk, device, wd.queue, *cmdBuffer);
354
355 if (!checkMarkerBuffer(vk, device, markerMemory, expected, params.useHostPtr ? hostMemory->size : 0,
356 params.useHostPtr))
357 return tcu::TestStatus::fail("Some marker values were incorrect");
358
359 return tcu::TestStatus::pass("Pass");
360 }
361
bufferMarkerOverwrite(Context & context,BaseTestParams params)362 tcu::TestStatus bufferMarkerOverwrite(Context &context, BaseTestParams params)
363 {
364 WorkingDevice wd;
365
366 createDeviceWithExtension(context, wd, params.testQueue, params.useHostPtr, params.offset);
367
368 const DeviceInterface &vk(*wd.deviceDriver);
369 const VkDevice device(*wd.logicalDevice);
370 const VkDeviceSize markerBufferSize(params.size * sizeof(uint32_t));
371 VkExternalMemoryBufferCreateInfo externalMemoryBufferCreateInfo = {
372 VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, DE_NULL,
373 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT};
374 VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(markerBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
375 if (params.useHostPtr)
376 bufferCreateInfo.pNext = &externalMemoryBufferCreateInfo;
377
378 Move<VkBuffer> markerBuffer(createBuffer(vk, device, &bufferCreateInfo));
379 MovePtr<ExternalHostMemory> hostMemory;
380 MovePtr<Allocation> markerMemory;
381
382 createMarkerBufferMemory(context.getInstanceInterface(), vk, context.getPhysicalDevice(), device, *markerBuffer,
383 params.offset, wd.allocator, MemoryRequirement::HostVisible, params.useHostPtr, hostMemory,
384 markerMemory);
385
386 de::Random rng(12345 ^ params.size);
387 std::vector<uint32_t> expected(params.size);
388
389 for (size_t i = 0; i < params.size; ++i)
390 expected[i] = 0;
391
392 if (params.useHostPtr)
393 {
394 writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size,
395 &expected[0]);
396 }
397 else
398 {
399 deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
400 flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
401 }
402
403 const Unique<VkCommandPool> cmdPool(
404 createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, wd.queueFamilyIdx));
405 const Unique<VkCommandBuffer> cmdBuffer(
406 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
407
408 beginCommandBuffer(vk, *cmdBuffer);
409
410 for (uint32_t i = 0; i < params.size * 10; ++i)
411 {
412 const uint32_t slot = rng.getUint32() % static_cast<uint32_t>(params.size);
413 const uint32_t value = i;
414
415 expected[slot] = value;
416
417 vk.cmdWriteBufferMarkerAMD(*cmdBuffer, params.stage, *markerBuffer,
418 static_cast<VkDeviceSize>(sizeof(uint32_t) * slot), expected[slot]);
419 }
420
421 const VkMemoryBarrier memoryDep = {
422 VK_STRUCTURE_TYPE_MEMORY_BARRIER,
423 DE_NULL,
424 VK_ACCESS_TRANSFER_WRITE_BIT,
425 VK_ACCESS_HOST_READ_BIT,
426 };
427
428 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memoryDep, 0,
429 DE_NULL, 0, DE_NULL);
430
431 VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
432
433 submitCommandsAndWait(vk, device, wd.queue, *cmdBuffer);
434
435 if (!checkMarkerBuffer(vk, device, markerMemory, expected, params.useHostPtr ? hostMemory->size : 0,
436 params.useHostPtr))
437 return tcu::TestStatus::fail("Some marker values were incorrect");
438
439 return tcu::TestStatus::pass("Pass");
440 }
441
442 enum MemoryDepMethod
443 {
444 MEMORY_DEP_DRAW,
445 MEMORY_DEP_DISPATCH,
446 MEMORY_DEP_COPY
447 };
448
449 struct MemoryDepParams
450 {
451 BaseTestParams base;
452 MemoryDepMethod method;
453 };
454
455 enum MemoryDepOwner
456 {
457 MEMORY_DEP_OWNER_NOBODY = 0,
458 MEMORY_DEP_OWNER_MARKER = 1,
459 MEMORY_DEP_OWNER_NON_MARKER = 2
460 };
461
computeMemoryDepBarrier(const MemoryDepParams & params,MemoryDepOwner owner,VkAccessFlags * memoryDepAccess,VkPipelineStageFlags * executionScope)462 void computeMemoryDepBarrier(const MemoryDepParams ¶ms, MemoryDepOwner owner, VkAccessFlags *memoryDepAccess,
463 VkPipelineStageFlags *executionScope)
464 {
465 DE_ASSERT(owner != MEMORY_DEP_OWNER_NOBODY);
466
467 if (owner == MEMORY_DEP_OWNER_MARKER)
468 {
469 *memoryDepAccess = VK_ACCESS_TRANSFER_WRITE_BIT;
470 *executionScope = params.base.stage | VK_PIPELINE_STAGE_TRANSFER_BIT;
471 }
472 else
473 {
474 if (params.method == MEMORY_DEP_COPY)
475 {
476 *memoryDepAccess = VK_ACCESS_TRANSFER_WRITE_BIT;
477 *executionScope = VK_PIPELINE_STAGE_TRANSFER_BIT;
478 }
479 else if (params.method == MEMORY_DEP_DISPATCH)
480 {
481 *memoryDepAccess = VK_ACCESS_SHADER_WRITE_BIT;
482 *executionScope = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
483 }
484 else
485 {
486 *memoryDepAccess = VK_ACCESS_SHADER_WRITE_BIT;
487 *executionScope = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
488 }
489 }
490 }
491
492 // Randomly do buffer marker writes and other operations (draws, dispatches) that shader-write to a shared buffer. Insert pipeline barriers
493 // when necessary and make sure that the synchronization between marker writes and non-marker writes are correctly handled by the barriers.
bufferMarkerMemoryDep(Context & context,MemoryDepParams params)494 tcu::TestStatus bufferMarkerMemoryDep(Context &context, MemoryDepParams params)
495 {
496 WorkingDevice wd;
497
498 createDeviceWithExtension(context, wd, params.base.testQueue, params.base.useHostPtr, params.base.offset);
499
500 VkBufferUsageFlags usageFlags = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
501
502 if ((params.method == MEMORY_DEP_DRAW) || (params.method == MEMORY_DEP_DISPATCH))
503 usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
504 else
505 usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
506
507 const uint32_t numIters(1000);
508 const DeviceInterface &vk(*wd.deviceDriver);
509 const VkDevice device(*wd.logicalDevice);
510 const uint32_t size(params.base.size);
511 const VkDeviceSize markerBufferSize(params.base.size * sizeof(uint32_t));
512 VkExternalMemoryBufferCreateInfo externalMemoryBufferCreateInfo = {
513 VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, DE_NULL,
514 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT};
515 VkBufferCreateInfo bufferCreateInfo = makeBufferCreateInfo(markerBufferSize, usageFlags);
516 if (params.base.useHostPtr)
517 bufferCreateInfo.pNext = &externalMemoryBufferCreateInfo;
518 Move<VkBuffer> markerBuffer(createBuffer(vk, device, &bufferCreateInfo));
519 MovePtr<ExternalHostMemory> hostMemory;
520 MovePtr<Allocation> markerMemory;
521
522 createMarkerBufferMemory(context.getInstanceInterface(), vk, context.getPhysicalDevice(), device, *markerBuffer,
523 params.base.offset, wd.allocator, MemoryRequirement::HostVisible, params.base.useHostPtr,
524 hostMemory, markerMemory);
525
526 de::Random rng(size ^ params.base.size);
527 std::vector<uint32_t> expected(params.base.size, 0);
528
529 Move<VkDescriptorPool> descriptorPool;
530 Move<VkDescriptorSetLayout> descriptorSetLayout;
531 Move<VkDescriptorSet> descriptorSet;
532 Move<VkPipelineLayout> pipelineLayout;
533 VkShaderStageFlags pushConstantStage = 0;
534
535 if ((params.method == MEMORY_DEP_DRAW) || (params.method == MEMORY_DEP_DISPATCH))
536 {
537 DescriptorPoolBuilder descriptorPoolBuilder;
538
539 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u);
540 descriptorPool = descriptorPoolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
541
542 DescriptorSetLayoutBuilder setLayoutBuilder;
543
544 setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
545 descriptorSetLayout = setLayoutBuilder.build(vk, device);
546
547 const VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = {
548 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType sType;
549 DE_NULL, // const void* pNext;
550 *descriptorPool, // VkDescriptorPool descriptorPool;
551 1u, // uint32_t setLayoutCount;
552 &descriptorSetLayout.get() // const VkDescriptorSetLayout* pSetLayouts;
553 };
554
555 descriptorSet = allocateDescriptorSet(vk, device, &descriptorSetAllocateInfo);
556
557 VkDescriptorBufferInfo markerBufferInfo = {*markerBuffer, 0, VK_WHOLE_SIZE};
558
559 VkWriteDescriptorSet writeSet[] = {{
560 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // VkStructureType sType;
561 DE_NULL, // const void* pNext;
562 descriptorSet.get(), // VkDescriptorSet dstSet;
563 0, // uint32_t dstBinding;
564 0, // uint32_t dstArrayElement;
565 1, // uint32_t descriptorCount;
566 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // VkDescriptorType descriptorType;
567 DE_NULL, // const VkDescriptorImageInfo* pImageInfo;
568 &markerBufferInfo, // const VkDescriptorBufferInfo* pBufferInfo;
569 DE_NULL // const VkBufferView* pTexelBufferViev
570 }};
571
572 vk.updateDescriptorSets(device, DE_LENGTH_OF_ARRAY(writeSet), writeSet, 0, DE_NULL);
573
574 VkDescriptorSetLayout setLayout = descriptorSetLayout.get();
575
576 pushConstantStage =
577 (params.method == MEMORY_DEP_DISPATCH ? VK_SHADER_STAGE_COMPUTE_BIT : VK_SHADER_STAGE_FRAGMENT_BIT);
578
579 const VkPushConstantRange pushConstantRange = {
580 pushConstantStage, // VkShaderStageFlags stageFlags;
581 0u, // uint32_t offset;
582 2 * sizeof(uint32_t), // uint32_t size;
583 };
584
585 const VkPipelineLayoutCreateInfo pipelineLayoutInfo = {
586 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
587 DE_NULL, // const void* pNext;
588 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
589 1u, // uint32_t setLayoutCount;
590 &setLayout, // const VkDescriptorSetLayout* pSetLayouts;
591 1u, // uint32_t pushConstantRangeCount;
592 &pushConstantRange, // const VkPushConstantRange* pPushConstantRanges;
593 };
594
595 pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutInfo);
596 }
597
598 Move<VkRenderPass> renderPass;
599 Move<VkFramebuffer> fbo;
600 Move<VkPipeline> pipeline;
601 Move<VkShaderModule> vertexModule;
602 Move<VkShaderModule> fragmentModule;
603 Move<VkShaderModule> computeModule;
604
605 if (params.method == MEMORY_DEP_DRAW)
606 {
607 const VkSubpassDescription subpassInfo = {
608 0, // VkSubpassDescriptionFlags flags;
609 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
610 0, // uint32_t inputAttachmentCount;
611 DE_NULL, // const VkAttachmentReference* pInputAttachments;
612 0, // uint32_t colorAttachmentCount;
613 DE_NULL, // const VkAttachmentReference* pColorAttachments;
614 0, // const VkAttachmentReference* pResolveAttachments;
615 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
616 0, // uint32_t preserveAttachmentCount;
617 DE_NULL // const uint32_t* pPreserveAttachments;
618 };
619
620 const VkRenderPassCreateInfo renderPassInfo = {
621 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
622 DE_NULL, // const void* pNext;
623 0, // VkRenderPassCreateFlags flags;
624 0, // uint32_t attachmentCount;
625 DE_NULL, // const VkAttachmentDescription* pAttachments;
626 1, // uint32_t subpassCount;
627 &subpassInfo, // const VkSubpassDescription* pSubpasses;
628 0u, // uint32_t dependencyCount;
629 DE_NULL // const VkSubpassDependency* pDependencies
630 };
631
632 renderPass = createRenderPass(vk, device, &renderPassInfo);
633
634 const VkFramebufferCreateInfo framebufferInfo = {
635 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
636 DE_NULL, // const void* pNext;
637 0, // VkFramebufferCreateFlags flags;
638 renderPass.get(), // VkRenderPass renderPass;
639 0, // uint32_t attachmentCount;
640 DE_NULL, // const VkImageView* pAttachments;
641 1, // uint32_t width;
642 1, // uint32_t height;
643 1, // uint32_t layers;
644 };
645
646 fbo = createFramebuffer(vk, device, &framebufferInfo);
647
648 vertexModule = createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u);
649 fragmentModule = createShaderModule(vk, device, context.getBinaryCollection().get("frag"), 0u);
650
651 const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo = {
652 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
653 DE_NULL, // const void* pNext;
654 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
655 0, // uint32_t vertexBindingDescriptionCount;
656 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
657 0, // uint32_t vertexAttributeDescriptionCount;
658 DE_NULL, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
659 };
660
661 const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo = {
662 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
663 DE_NULL, // const void* pNext;
664 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
665 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // VkPrimitiveTopology topology;
666 VK_FALSE, // VkBool32 primitiveRestartEnable;
667 };
668
669 std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
670
671 {
672 const VkPipelineShaderStageCreateInfo createInfo = {
673 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
674 DE_NULL, // const void* pNext;
675 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
676 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage;
677 vertexModule.get(), // VkShaderModule module;
678 "main", // const char* pName;
679 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
680 };
681
682 shaderStages.push_back(createInfo);
683 }
684
685 {
686 const VkPipelineShaderStageCreateInfo createInfo = {
687 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
688 DE_NULL, // const void* pNext;
689 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
690 VK_SHADER_STAGE_FRAGMENT_BIT, // VkShaderStageFlagBits stage;
691 fragmentModule.get(), // VkShaderModule module;
692 "main", // const char* pName;
693 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
694 };
695
696 shaderStages.push_back(createInfo);
697 }
698
699 VkViewport viewport;
700
701 viewport.x = 0;
702 viewport.y = 0;
703 viewport.width = 1;
704 viewport.height = 1;
705 viewport.minDepth = 0.0f;
706 viewport.maxDepth = 1.0f;
707
708 VkRect2D scissor;
709
710 scissor.offset.x = 0;
711 scissor.offset.y = 0;
712 scissor.extent.width = 1;
713 scissor.extent.height = 1;
714
715 const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo = {
716 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType;
717 DE_NULL, // const void* pNext;
718 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags;
719 1u, // uint32_t viewportCount;
720 &viewport, // const VkViewport* pViewports;
721 1u, // uint32_t scissorCount;
722 &scissor, // const VkRect2D* pScissors;
723 };
724
725 const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo = {
726 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
727 DE_NULL, // const void* pNext;
728 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
729 VK_FALSE, // VkBool32 depthClampEnable;
730 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
731 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
732 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
733 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace;
734 VK_FALSE, // VkBool32 depthBiasEnable;
735 0.0f, // float depthBiasConstantFactor;
736 0.0f, // float depthBiasClamp;
737 0.0f, // float depthBiasSlopeFactor;
738 1.0f, // float lineWidth;
739 };
740
741 const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo = {
742
743 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
744 DE_NULL, // const void* pNext;
745 (VkPipelineMultisampleStateCreateFlags)0, // VkPipelineMultisampleStateCreateFlags flags;
746 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples;
747 VK_FALSE, // VkBool32 sampleShadingEnable;
748 1.0f, // float minSampleShading;
749 DE_NULL, // const VkSampleMask* pSampleMask;
750 VK_FALSE, // VkBool32 alphaToCoverageEnable;
751 VK_FALSE, // VkBool32 alphaToOneEnable;
752 };
753
754 const VkStencilOpState noStencilOp = {
755 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
756 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
757 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
758 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
759 0, // uint32_t compareMask
760 0, // uint32_t writeMask
761 0 // uint32_t reference
762 };
763
764 VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo = {
765 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
766 DE_NULL, // const void* pNext;
767 (VkPipelineDepthStencilStateCreateFlags)0, // VkPipelineDepthStencilStateCreateFlags flags;
768 VK_FALSE, // VkBool32 depthTestEnable;
769 VK_FALSE, // VkBool32 depthWriteEnable;
770 VK_COMPARE_OP_ALWAYS, // VkCompareOp depthCompareOp;
771 VK_FALSE, // VkBool32 depthBoundsTestEnable;
772 VK_FALSE, // VkBool32 stencilTestEnable;
773 noStencilOp, // VkStencilOpState front;
774 noStencilOp, // VkStencilOpState back;
775 0.0f, // float minDepthBounds;
776 1.0f, // float maxDepthBounds;
777 };
778
779 const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo = {
780 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
781 DE_NULL, // const void* pNext;
782 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
783 VK_FALSE, // VkBool32 logicOpEnable;
784 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
785 0, // uint32_t attachmentCount;
786 DE_NULL, // const VkPipelineColorBlendAttachmentState* pAttachments;
787 {0.0f, 0.0f, 0.0f, 0.0f}, // float blendConstants[4];
788 };
789
790 const VkGraphicsPipelineCreateInfo graphicsPipelineInfo = {
791 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
792 DE_NULL, // const void* pNext;
793 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
794 static_cast<uint32_t>(shaderStages.size()), // uint32_t stageCount;
795 de::dataOrNull(shaderStages), // const VkPipelineShaderStageCreateInfo* pStages;
796 &vertexInputStateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
797 &pipelineInputAssemblyStateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
798 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
799 &pipelineViewportStateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
800 &pipelineRasterizationStateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
801 &pipelineMultisampleStateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
802 &pipelineDepthStencilStateInfo, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
803 &pipelineColorBlendStateInfo, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
804 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
805 pipelineLayout.get(), // VkPipelineLayout layout;
806 renderPass.get(), // VkRenderPass renderPass;
807 0, // uint32_t subpass;
808 DE_NULL, // VkPipeline basePipelineHandle;
809 0, // int32_t basePipelineIndex;
810 };
811
812 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
813 }
814 else if (params.method == MEMORY_DEP_DISPATCH)
815 {
816 computeModule = createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0u);
817
818 const VkPipelineShaderStageCreateInfo shaderStageInfo = {
819 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
820 DE_NULL, // const void* pNext;
821 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
822 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
823 computeModule.get(), // VkShaderModule module;
824 "main", // const char* pName;
825 DE_NULL // const VkSpecializationInfo* pSpecializationInfo;
826 };
827
828 const VkComputePipelineCreateInfo computePipelineInfo = {
829 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
830 DE_NULL, // const void* pNext;
831 0u, // VkPipelineCreateFlags flags;
832 shaderStageInfo, // VkPipelineShaderStageCreateInfo stage;
833 pipelineLayout.get(), // VkPipelineLayout layout;
834 DE_NULL, // VkPipeline basePipelineHandle;
835 0 // int32_t basePipelineIndex;
836 };
837
838 pipeline = createComputePipeline(vk, device, DE_NULL, &computePipelineInfo);
839 }
840
841 if (params.base.useHostPtr)
842 {
843 writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size,
844 &expected[0]);
845 }
846 else
847 {
848 deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
849 flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
850 }
851
852 const Unique<VkCommandPool> cmdPool(
853 createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, wd.queueFamilyIdx));
854 const Unique<VkCommandBuffer> cmdBuffer(
855 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
856
857 beginCommandBuffer(vk, *cmdBuffer);
858
859 VkDescriptorSet setHandle = *descriptorSet;
860
861 std::vector<MemoryDepOwner> dataOwner(size, MEMORY_DEP_OWNER_NOBODY);
862
863 if (params.method == MEMORY_DEP_DRAW)
864 {
865 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
866 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0, 1, &setHandle, 0,
867 DE_NULL);
868 }
869 else if (params.method == MEMORY_DEP_DISPATCH)
870 {
871 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
872 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &setHandle, 0,
873 DE_NULL);
874 }
875
876 if (params.base.useHostPtr)
877 {
878 writeHostMemory(vk, device, markerMemory->getMemory(), static_cast<size_t>(markerBufferSize), hostMemory->size,
879 &expected[0]);
880 }
881 else
882 {
883 deMemcpy(markerMemory->getHostPtr(), &expected[0], static_cast<size_t>(markerBufferSize));
884 flushMappedMemoryRange(vk, device, markerMemory->getMemory(), markerMemory->getOffset(), VK_WHOLE_SIZE);
885 }
886
887 uint32_t writeStages = 0;
888 uint32_t writeAccess = 0;
889
890 for (uint32_t i = 0; i < numIters; ++i)
891 {
892 uint32_t slot = rng.getUint32() % size;
893 MemoryDepOwner oldOwner = dataOwner[slot];
894 MemoryDepOwner newOwner = static_cast<MemoryDepOwner>(1 + (rng.getUint32() % 2));
895
896 DE_ASSERT(newOwner == MEMORY_DEP_OWNER_MARKER || newOwner == MEMORY_DEP_OWNER_NON_MARKER);
897 DE_ASSERT(slot < size);
898
899 if ((oldOwner != newOwner && oldOwner != MEMORY_DEP_OWNER_NOBODY) ||
900 (oldOwner == MEMORY_DEP_OWNER_NON_MARKER && newOwner == MEMORY_DEP_OWNER_NON_MARKER))
901 {
902 VkBufferMemoryBarrier memoryDep = {
903 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
904 DE_NULL, // const void* pNext;
905 0, // VkAccessFlags srcAccessMask;
906 0, // VkAccessFlags dstAccessMask;
907 wd.queueFamilyIdx, // uint32_t srcQueueFamilyIndex;
908 wd.queueFamilyIdx, // uint32_t dstQueueFamilyIndex;
909 *markerBuffer, // VkBuffer buffer;
910 sizeof(uint32_t) * slot, // VkDeviceSize offset;
911 sizeof(uint32_t) // VkDeviceSize size;
912 };
913
914 VkPipelineStageFlags srcStageMask;
915 VkPipelineStageFlags dstStageMask;
916
917 computeMemoryDepBarrier(params, oldOwner, &memoryDep.srcAccessMask, &srcStageMask);
918 computeMemoryDepBarrier(params, newOwner, &memoryDep.dstAccessMask, &dstStageMask);
919
920 vk.cmdPipelineBarrier(*cmdBuffer, srcStageMask, dstStageMask, 0, 0, DE_NULL, 1, &memoryDep, 0, DE_NULL);
921 }
922
923 if (params.method == MEMORY_DEP_DRAW)
924 {
925 const VkRenderPassBeginInfo beginInfo = {
926 VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, // VkStructureType sType;
927 DE_NULL, // const void* pNext;
928 renderPass.get(), // VkRenderPass renderPass;
929 fbo.get(), // VkFramebuffer framebuffer;
930 {{
931 0,
932 0,
933 },
934 {1, 1}}, // VkRect2D renderArea;
935 0, // uint32_t clearValueCount;
936 DE_NULL // const VkClearValue* pClearValues;
937 };
938
939 vk.cmdBeginRenderPass(*cmdBuffer, &beginInfo, VK_SUBPASS_CONTENTS_INLINE);
940 }
941
942 const uint32_t value = i;
943
944 if (newOwner == MEMORY_DEP_OWNER_MARKER)
945 {
946 vk.cmdWriteBufferMarkerAMD(*cmdBuffer, params.base.stage, *markerBuffer, sizeof(uint32_t) * slot, value);
947
948 writeStages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
949 writeAccess |= VK_ACCESS_TRANSFER_WRITE_BIT;
950 }
951 else
952 {
953 DE_ASSERT(newOwner == MEMORY_DEP_OWNER_NON_MARKER);
954
955 if (params.method == MEMORY_DEP_COPY)
956 {
957 vk.cmdUpdateBuffer(*cmdBuffer, *markerBuffer, sizeof(uint32_t) * slot, sizeof(uint32_t), &value);
958
959 writeStages |= VK_PIPELINE_STAGE_TRANSFER_BIT;
960 writeAccess |= VK_ACCESS_TRANSFER_WRITE_BIT;
961 }
962 else if (params.method == MEMORY_DEP_DRAW)
963 {
964 const uint32_t pushConst[] = {slot, value};
965
966 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, pushConstantStage, 0, sizeof(pushConst), pushConst);
967 vk.cmdDraw(*cmdBuffer, 1, 1, i, 0);
968
969 writeStages |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
970 writeAccess |= VK_ACCESS_SHADER_WRITE_BIT;
971 }
972 else
973 {
974 const uint32_t pushConst[] = {slot, value};
975
976 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, pushConstantStage, 0, sizeof(pushConst), pushConst);
977 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
978
979 writeStages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
980 writeAccess |= VK_ACCESS_SHADER_WRITE_BIT;
981 }
982 }
983
984 dataOwner[slot] = newOwner;
985 expected[slot] = value;
986
987 if (params.method == MEMORY_DEP_DRAW)
988 {
989 vk.cmdEndRenderPass(*cmdBuffer);
990 }
991 }
992
993 const VkMemoryBarrier memoryDep = {
994 VK_STRUCTURE_TYPE_MEMORY_BARRIER,
995 DE_NULL,
996 writeAccess,
997 VK_ACCESS_HOST_READ_BIT,
998 };
999
1000 vk.cmdPipelineBarrier(*cmdBuffer, writeStages, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memoryDep, 0, DE_NULL, 0,
1001 DE_NULL);
1002
1003 VK_CHECK(vk.endCommandBuffer(*cmdBuffer));
1004
1005 submitCommandsAndWait(vk, device, wd.queue, *cmdBuffer);
1006
1007 if (!checkMarkerBuffer(vk, device, markerMemory, expected, params.base.useHostPtr ? hostMemory->size : 0,
1008 params.base.useHostPtr))
1009 return tcu::TestStatus::fail("Some marker values were incorrect");
1010
1011 return tcu::TestStatus::pass("Pass");
1012 }
1013
initMemoryDepPrograms(SourceCollections & programCollection,const MemoryDepParams params)1014 void initMemoryDepPrograms(SourceCollections &programCollection, const MemoryDepParams params)
1015 {
1016 if (params.method == MEMORY_DEP_DRAW)
1017 {
1018 {
1019 std::ostringstream src;
1020
1021 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1022 << "layout(location = 0) flat out uint offset;\n"
1023 << "out gl_PerVertex { vec4 gl_Position; float gl_PointSize; };\n"
1024 << "void main() {\n"
1025 << " offset = gl_VertexIndex;\n"
1026 << " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1027 << " gl_PointSize = 1.0f;\n"
1028 << "}\n";
1029
1030 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1031 }
1032
1033 {
1034 std::ostringstream src;
1035
1036 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1037 << "layout(push_constant) uniform Constants { uvec2 params; } pc;\n"
1038 << "layout(std430, set = 0, binding = 0) buffer Data { uint elems[]; } data;\n"
1039 << "layout(location = 0) flat in uint offset;\n"
1040 << "void main() {\n"
1041 << " data.elems[pc.params.x] = pc.params.y;\n"
1042 << "}\n";
1043
1044 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1045 }
1046 }
1047 else if (params.method == MEMORY_DEP_DISPATCH)
1048 {
1049 {
1050 std::ostringstream src;
1051
1052 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1053 << "layout(local_size_x = 1u, local_size_y = 1u, local_size_z = 1u) in;\n"
1054 << "layout(push_constant) uniform Constants { uvec2 params; } pc;\n"
1055 << "layout(std430, set = 0, binding = 0) buffer Data { uint elems[]; } data;\n"
1056 << "void main() {\n"
1057 << " data.elems[pc.params.x] = pc.params.y;\n"
1058 << "}\n";
1059
1060 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str());
1061 }
1062 }
1063 }
1064
checkBufferMarkerSupport(Context & context,BaseTestParams params)1065 void checkBufferMarkerSupport(Context &context, BaseTestParams params)
1066 {
1067 if (params.useHostPtr)
1068 context.requireDeviceFunctionality("VK_EXT_external_memory_host");
1069
1070 context.requireDeviceFunctionality("VK_AMD_buffer_marker");
1071 }
1072
checkBufferMarkerSupport(Context & context,MemoryDepParams params)1073 void checkBufferMarkerSupport(Context &context, MemoryDepParams params)
1074 {
1075 if (params.base.useHostPtr)
1076 context.requireDeviceFunctionality("VK_EXT_external_memory_host");
1077
1078 context.requireDeviceFunctionality("VK_AMD_buffer_marker");
1079 }
1080
getTestCaseName(const std::string base,size_t offset)1081 std::string getTestCaseName(const std::string base, size_t offset)
1082 {
1083 if (offset == 0)
1084 return base;
1085 return base + "_offset_" + std::to_string(offset);
1086 }
1087
createBufferMarkerTestsInGroup(tcu::TestContext & testCtx)1088 tcu::TestCaseGroup *createBufferMarkerTestsInGroup(tcu::TestContext &testCtx)
1089 {
1090 // AMD_buffer_marker Tests
1091 tcu::TestCaseGroup *root = (new tcu::TestCaseGroup(testCtx, "buffer_marker"));
1092
1093 VkQueueFlagBits queues[] = {VK_QUEUE_GRAPHICS_BIT, VK_QUEUE_COMPUTE_BIT, VK_QUEUE_TRANSFER_BIT};
1094 const char *queueNames[] = {"graphics", "compute", "transfer"};
1095
1096 BaseTestParams base;
1097 deMemset(&base, 0, sizeof(base));
1098
1099 for (size_t queueNdx = 0; queueNdx < DE_LENGTH_OF_ARRAY(queues); ++queueNdx)
1100 {
1101 // Buffer marker tests for a specific queue family
1102 tcu::TestCaseGroup *queueGroup = (new tcu::TestCaseGroup(testCtx, queueNames[queueNdx]));
1103
1104 const char *memoryNames[] = {"external_host_mem", "default_mem"};
1105 const bool memoryTypes[] = {true, false};
1106
1107 base.testQueue = queues[queueNdx];
1108
1109 for (size_t memNdx = 0; memNdx < DE_LENGTH_OF_ARRAY(memoryTypes); ++memNdx)
1110 {
1111 tcu::TestCaseGroup *memoryGroup = (new tcu::TestCaseGroup(testCtx, memoryNames[memNdx]));
1112
1113 base.useHostPtr = memoryTypes[memNdx];
1114
1115 VkPipelineStageFlagBits stages[] = {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1116 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT};
1117 const char *stageNames[] = {"top_of_pipe", "bottom_of_pipe"};
1118
1119 for (size_t stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stages); ++stageNdx)
1120 {
1121 tcu::TestCaseGroup *stageGroup = (new tcu::TestCaseGroup(testCtx, stageNames[stageNdx]));
1122
1123 base.stage = stages[stageNdx];
1124
1125 {
1126 tcu::TestCaseGroup *sequentialGroup = (new tcu::TestCaseGroup(testCtx, "sequential"));
1127
1128 base.size = 4;
1129 base.offset = 0;
1130
1131 // Writes 4 sequential marker values into a buffer
1132 addFunctionCase(sequentialGroup, "4", checkBufferMarkerSupport, bufferMarkerSequential, base);
1133
1134 base.size = 64;
1135 base.offset = 0;
1136
1137 // Writes 64 sequential marker values into a buffer
1138 addFunctionCase(sequentialGroup, "64", checkBufferMarkerSupport, bufferMarkerSequential, base);
1139
1140 base.offset = 16;
1141
1142 // Writes 64 sequential marker values into a buffer offset by 16
1143 addFunctionCase(sequentialGroup, getTestCaseName("64", base.offset), checkBufferMarkerSupport,
1144 bufferMarkerSequential, base);
1145
1146 base.size = 65536;
1147 base.offset = 0;
1148
1149 // Writes 65536 sequential marker values into a buffer
1150 addFunctionCase(sequentialGroup, "65536", checkBufferMarkerSupport, bufferMarkerSequential, base);
1151
1152 base.offset = 1024;
1153
1154 // Writes 65536 sequential marker values into a buffer offset by 1024
1155 addFunctionCase(sequentialGroup, getTestCaseName("65536", base.offset), checkBufferMarkerSupport,
1156 bufferMarkerSequential, base);
1157
1158 base.offset = 0;
1159 stageGroup->addChild(sequentialGroup);
1160 }
1161
1162 {
1163 tcu::TestCaseGroup *overwriteGroup = (new tcu::TestCaseGroup(testCtx, "overwrite"));
1164
1165 base.size = 1;
1166
1167 // Randomly overwrites marker values to a 1-size buffer
1168 addFunctionCase(overwriteGroup, "1", checkBufferMarkerSupport, bufferMarkerOverwrite, base);
1169
1170 base.size = 4;
1171
1172 // Randomly overwrites marker values to a 4-size buffer
1173 addFunctionCase(overwriteGroup, "4", checkBufferMarkerSupport, bufferMarkerOverwrite, base);
1174
1175 base.size = 64;
1176
1177 // Randomly overwrites markers values to a 64-size buffer
1178 addFunctionCase(overwriteGroup, "64", checkBufferMarkerSupport, bufferMarkerOverwrite, base);
1179 base.offset = 24;
1180
1181 // Randomly overwrites markers values to a 64-size buffer at offset 24
1182 addFunctionCase(overwriteGroup, getTestCaseName("64", base.offset), checkBufferMarkerSupport,
1183 bufferMarkerOverwrite, base);
1184
1185 base.offset = 0;
1186
1187 stageGroup->addChild(overwriteGroup);
1188 }
1189
1190 {
1191 tcu::TestCaseGroup *memoryDepGroup = (new tcu::TestCaseGroup(testCtx, "memory_dep"));
1192
1193 MemoryDepParams params;
1194 size_t offsets[] = {0, 24};
1195 deMemset(¶ms, 0, sizeof(params));
1196
1197 for (size_t offsetIdx = 0; offsetIdx < de::arrayLength(offsets); offsetIdx++)
1198 {
1199 params.base = base;
1200 params.base.size = 128;
1201 params.base.offset = offsets[offsetIdx];
1202
1203 if (params.base.testQueue == VK_QUEUE_GRAPHICS_BIT)
1204 {
1205 params.method = MEMORY_DEP_DRAW;
1206
1207 // Test memory dependencies between marker writes and draws
1208 addFunctionCaseWithPrograms(memoryDepGroup, getTestCaseName("draw", params.base.offset),
1209 checkBufferMarkerSupport, initMemoryDepPrograms,
1210 bufferMarkerMemoryDep, params);
1211 }
1212
1213 if (params.base.testQueue != VK_QUEUE_TRANSFER_BIT)
1214 {
1215 params.method = MEMORY_DEP_DISPATCH;
1216
1217 // Test memory dependencies between marker writes and compute dispatches
1218 addFunctionCaseWithPrograms(memoryDepGroup, getTestCaseName("dispatch", params.base.offset),
1219 checkBufferMarkerSupport, initMemoryDepPrograms,
1220 bufferMarkerMemoryDep, params);
1221 }
1222
1223 params.method = MEMORY_DEP_COPY;
1224
1225 // Test memory dependencies between marker writes and buffer copies
1226 addFunctionCaseWithPrograms(memoryDepGroup, getTestCaseName("buffer_copy", params.base.offset),
1227 checkBufferMarkerSupport, initMemoryDepPrograms,
1228 bufferMarkerMemoryDep, params);
1229 }
1230
1231 stageGroup->addChild(memoryDepGroup);
1232 }
1233
1234 memoryGroup->addChild(stageGroup);
1235 }
1236
1237 queueGroup->addChild(memoryGroup);
1238 }
1239
1240 root->addChild(queueGroup);
1241 }
1242
1243 return root;
1244 }
1245
1246 } // namespace
1247
createBufferMarkerTests(tcu::TestContext & testCtx)1248 tcu::TestCaseGroup *createBufferMarkerTests(tcu::TestContext &testCtx)
1249 {
1250 return createBufferMarkerTestsInGroup(testCtx);
1251 }
1252
1253 } // namespace api
1254 } // namespace vkt
1255