1 
2 /*------------------------------------------------------------------------
3  * Vulkan Conformance Tests
4  * ------------------------
5  *
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Signal ordering tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktSynchronizationSignalOrderTests.hpp"
26 #include "vktSynchronizationOperation.hpp"
27 #include "vktSynchronizationOperationTestData.hpp"
28 #include "vktSynchronizationOperationResources.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktSynchronizationUtil.hpp"
31 #include "vktExternalMemoryUtil.hpp"
32 #include "vktCustomInstancesDevices.hpp"
33 #include "vkBarrierUtil.hpp"
34 
35 #include "vkDefs.hpp"
36 #include "vkPlatform.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkCmdUtil.hpp"
39 #include "vkImageUtil.hpp"
40 #include "vkRef.hpp"
41 #include "vkTypeUtil.hpp"
42 
43 #include "tcuTestLog.hpp"
44 #include "tcuCommandLine.hpp"
45 
46 #include "deRandom.hpp"
47 #include "deThread.hpp"
48 #include "deUniquePtr.hpp"
49 
50 #include <limits>
51 #include <set>
52 
53 namespace vkt
54 {
55 namespace synchronization
56 {
57 namespace
58 {
59 
60 using namespace vk;
61 using namespace vkt::ExternalMemoryUtil;
62 using de::MovePtr;
63 using de::SharedPtr;
64 using de::UniquePtr;
65 
66 template <typename T>
makeVkSharedPtr(Move<T> move)67 inline SharedPtr<Move<T>> makeVkSharedPtr(Move<T> move)
68 {
69     return SharedPtr<Move<T>>(new Move<T>(move));
70 }
71 
72 template <typename T>
makeSharedPtr(de::MovePtr<T> move)73 inline SharedPtr<T> makeSharedPtr(de::MovePtr<T> move)
74 {
75     return SharedPtr<T>(move.release());
76 }
77 
78 template <typename T>
makeSharedPtr(T * ptr)79 inline SharedPtr<T> makeSharedPtr(T *ptr)
80 {
81     return SharedPtr<T>(ptr);
82 }
83 
hostSignal(const DeviceInterface & vk,const VkDevice & device,VkSemaphore semaphore,const uint64_t timelineValue)84 void hostSignal(const DeviceInterface &vk, const VkDevice &device, VkSemaphore semaphore, const uint64_t timelineValue)
85 {
86     VkSemaphoreSignalInfoKHR ssi = {
87         VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, // VkStructureType sType;
88         DE_NULL,                                 // const void* pNext;
89         semaphore,                               // VkSemaphore semaphore;
90         timelineValue,                           // uint64_t value;
91     };
92 
93     VK_CHECK(vk.signalSemaphore(device, &ssi));
94 }
95 
96 // Waits for the device to be idle when destroying the guard object.
97 class DeviceWaitIdleGuard
98 {
99 public:
DeviceWaitIdleGuard(const DeviceInterface & vkd,const VkDevice device)100     DeviceWaitIdleGuard(const DeviceInterface &vkd, const VkDevice device) : m_vkd(vkd), m_device(device)
101     {
102     }
103 
~DeviceWaitIdleGuard()104     ~DeviceWaitIdleGuard()
105     {
106         VK_CHECK(m_vkd.deviceWaitIdle(m_device));
107     }
108 
109 protected:
110     const DeviceInterface &m_vkd;
111     const VkDevice m_device;
112 };
113 
createTestDevice(const Context & context)114 Move<VkDevice> createTestDevice(const Context &context)
115 {
116     const float priority = 0.0f;
117     const std::vector<VkQueueFamilyProperties> queueFamilyProperties =
118         getPhysicalDeviceQueueFamilyProperties(context.getInstanceInterface(), context.getPhysicalDevice());
119     std::vector<uint32_t> queueFamilyIndices(queueFamilyProperties.size(), 0xFFFFFFFFu);
120     std::vector<const char *> extensions;
121 
122     VkPhysicalDeviceFeatures2 createPhysicalFeature{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, DE_NULL,
123                                                     context.getDeviceFeatures()};
124     VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures{
125         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, DE_NULL, true};
126     VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2Features{
127         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR, DE_NULL, true};
128     void **nextPtr = &createPhysicalFeature.pNext;
129 
130     if (context.isDeviceFunctionalitySupported("VK_KHR_timeline_semaphore"))
131     {
132         extensions.push_back("VK_KHR_timeline_semaphore");
133         addToChainVulkanStructure(&nextPtr, timelineSemaphoreFeatures);
134     }
135 
136     if (!isCoreDeviceExtension(context.getUsedApiVersion(), "VK_KHR_external_semaphore"))
137         extensions.push_back("VK_KHR_external_semaphore");
138     if (!isCoreDeviceExtension(context.getUsedApiVersion(), "VK_KHR_external_memory"))
139         extensions.push_back("VK_KHR_external_memory");
140 
141     if (context.isDeviceFunctionalitySupported("VK_KHR_external_semaphore_fd"))
142         extensions.push_back("VK_KHR_external_semaphore_fd");
143 
144     if (context.isDeviceFunctionalitySupported("VK_KHR_external_semaphore_win32"))
145         extensions.push_back("VK_KHR_external_semaphore_win32");
146 
147     if (context.isDeviceFunctionalitySupported("VK_KHR_external_memory_win32"))
148         extensions.push_back("VK_KHR_external_memory_win32");
149 
150     if (context.isDeviceFunctionalitySupported("VK_KHR_synchronization2"))
151     {
152         extensions.push_back("VK_KHR_synchronization2");
153         addToChainVulkanStructure(&nextPtr, synchronization2Features);
154     }
155 
156     try
157     {
158         uint32_t maxQueueCount = 1;
159         for (const VkQueueFamilyProperties &qfp : queueFamilyProperties)
160             maxQueueCount = deMaxu32(qfp.queueCount, maxQueueCount);
161 
162         std::vector<float> queuePriorities(maxQueueCount, priority);
163         std::vector<VkDeviceQueueCreateInfo> queues;
164 
165         for (size_t ndx = 0; ndx < queueFamilyProperties.size(); ndx++)
166         {
167             const VkDeviceQueueCreateInfo createInfo = {VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
168                                                         DE_NULL,
169                                                         0u,
170 
171                                                         (uint32_t)ndx,
172                                                         queueFamilyProperties[ndx].queueCount,
173                                                         queuePriorities.data()};
174 
175             queues.push_back(createInfo);
176         }
177 
178         const VkDeviceCreateInfo createInfo = {VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
179                                                &createPhysicalFeature,
180                                                0u,
181 
182                                                (uint32_t)queues.size(),
183                                                &queues[0],
184 
185                                                0u,
186                                                DE_NULL,
187 
188                                                (uint32_t)extensions.size(),
189                                                extensions.empty() ? DE_NULL : &extensions[0],
190                                                0u};
191 
192         const auto validation = context.getTestContext().getCommandLine().isValidationEnabled();
193         return createCustomDevice(validation, context.getPlatformInterface(), context.getInstance(),
194                                   context.getInstanceInterface(), context.getPhysicalDevice(), &createInfo);
195     }
196     catch (const vk::Error &error)
197     {
198         if (error.getError() == VK_ERROR_EXTENSION_NOT_PRESENT)
199             TCU_THROW(NotSupportedError, "Required extensions not supported");
200         else
201             throw;
202     }
203 }
204 
205 // Class to wrap a singleton instance and device
206 class SingletonDevice
207 {
SingletonDevice(const Context & context)208     SingletonDevice(const Context &context) : m_logicalDevice(createTestDevice(context))
209     {
210     }
211 
212 public:
getDevice(const Context & context)213     static const Unique<vk::VkDevice> &getDevice(const Context &context)
214     {
215         if (!m_singletonDevice)
216             m_singletonDevice = SharedPtr<SingletonDevice>(new SingletonDevice(context));
217 
218         DE_ASSERT(m_singletonDevice);
219         return m_singletonDevice->m_logicalDevice;
220     }
221 
destroy()222     static void destroy()
223     {
224         m_singletonDevice.clear();
225     }
226 
227 private:
228     const Unique<vk::VkDevice> m_logicalDevice;
229 
230     static SharedPtr<SingletonDevice> m_singletonDevice;
231 };
232 SharedPtr<SingletonDevice> SingletonDevice::m_singletonDevice;
233 
cleanupGroup()234 static void cleanupGroup()
235 {
236     // Destroy singleton object
237     SingletonDevice::destroy();
238 }
239 
240 class SimpleAllocation : public Allocation
241 {
242 public:
243     SimpleAllocation(const DeviceInterface &vkd, VkDevice device, const VkDeviceMemory memory);
244     ~SimpleAllocation(void);
245 
246 private:
247     const DeviceInterface &m_vkd;
248     const VkDevice m_device;
249 };
250 
SimpleAllocation(const DeviceInterface & vkd,VkDevice device,const VkDeviceMemory memory)251 SimpleAllocation::SimpleAllocation(const DeviceInterface &vkd, VkDevice device, const VkDeviceMemory memory)
252     : Allocation(memory, 0, DE_NULL)
253     , m_vkd(vkd)
254     , m_device(device)
255 {
256 }
257 
~SimpleAllocation(void)258 SimpleAllocation::~SimpleAllocation(void)
259 {
260     m_vkd.freeMemory(m_device, getMemory(), DE_NULL);
261 }
262 
getMemoryRequirements(const DeviceInterface & vkd,VkDevice device,VkBuffer buffer)263 vk::VkMemoryRequirements getMemoryRequirements(const DeviceInterface &vkd, VkDevice device, VkBuffer buffer)
264 {
265     const VkBufferMemoryRequirementsInfo2 requirementInfo = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
266                                                              DE_NULL, buffer};
267     VkMemoryRequirements2 requirements                    = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
268                                                              DE_NULL,
269                                                              {
270                                               0u,
271                                               0u,
272                                               0u,
273                                           }};
274     vkd.getBufferMemoryRequirements2(device, &requirementInfo, &requirements);
275     return requirements.memoryRequirements;
276 }
277 
getMemoryRequirements(const DeviceInterface & vkd,VkDevice device,VkImage image)278 vk::VkMemoryRequirements getMemoryRequirements(const DeviceInterface &vkd, VkDevice device, VkImage image)
279 {
280     const VkImageMemoryRequirementsInfo2 requirementInfo = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, DE_NULL,
281                                                             image};
282     VkMemoryRequirements2 requirements                   = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
283                                                             DE_NULL,
284                                                             {
285                                               0u,
286                                               0u,
287                                               0u,
288                                           }};
289     vkd.getImageMemoryRequirements2(device, &requirementInfo, &requirements);
290 
291     return requirements.memoryRequirements;
292 }
293 
importAndBindMemory(const DeviceInterface & vkd,VkDevice device,VkBuffer buffer,NativeHandle & nativeHandle,VkExternalMemoryHandleTypeFlagBits externalType,const uint32_t exportedMemoryTypeIndex)294 MovePtr<Allocation> importAndBindMemory(const DeviceInterface &vkd, VkDevice device, VkBuffer buffer,
295                                         NativeHandle &nativeHandle, VkExternalMemoryHandleTypeFlagBits externalType,
296                                         const uint32_t exportedMemoryTypeIndex)
297 {
298     const VkMemoryRequirements requirements = getBufferMemoryRequirements(vkd, device, buffer);
299     Move<VkDeviceMemory> memory;
300 
301     if (!!buffer)
302         memory = importDedicatedMemory(vkd, device, buffer, requirements, externalType, exportedMemoryTypeIndex,
303                                        nativeHandle);
304     else
305         memory = importMemory(vkd, device, requirements, externalType, exportedMemoryTypeIndex, nativeHandle);
306 
307     VK_CHECK(vkd.bindBufferMemory(device, buffer, *memory, 0u));
308 
309     return MovePtr<Allocation>(new SimpleAllocation(vkd, device, memory.disown()));
310 }
311 
importAndBindMemory(const DeviceInterface & vkd,VkDevice device,VkImage image,NativeHandle & nativeHandle,VkExternalMemoryHandleTypeFlagBits externalType,uint32_t exportedMemoryTypeIndex)312 MovePtr<Allocation> importAndBindMemory(const DeviceInterface &vkd, VkDevice device, VkImage image,
313                                         NativeHandle &nativeHandle, VkExternalMemoryHandleTypeFlagBits externalType,
314                                         uint32_t exportedMemoryTypeIndex)
315 {
316     const VkMemoryRequirements requirements = getImageMemoryRequirements(vkd, device, image);
317     Move<VkDeviceMemory> memory;
318 
319     if (!!image)
320         memory = importDedicatedMemory(vkd, device, image, requirements, externalType, exportedMemoryTypeIndex,
321                                        nativeHandle);
322     else
323         memory = importMemory(vkd, device, requirements, externalType, exportedMemoryTypeIndex, nativeHandle);
324 
325     VK_CHECK(vkd.bindImageMemory(device, image, *memory, 0u));
326 
327     return MovePtr<Allocation>(new SimpleAllocation(vkd, device, memory.disown()));
328 }
329 
330 struct QueueTimelineIteration
331 {
QueueTimelineIterationvkt::synchronization::__anonbd832d2a0111::QueueTimelineIteration332     QueueTimelineIteration(const SharedPtr<OperationSupport> &_opSupport, uint64_t lastValue, VkQueue _queue,
333                            uint32_t _queueFamilyIdx, de::Random &rng)
334         : opSupport(_opSupport)
335         , queue(_queue)
336         , queueFamilyIdx(_queueFamilyIdx)
337     {
338         timelineValue = lastValue + rng.getInt(1, 100);
339     }
~QueueTimelineIterationvkt::synchronization::__anonbd832d2a0111::QueueTimelineIteration340     ~QueueTimelineIteration()
341     {
342     }
343 
344     SharedPtr<OperationSupport> opSupport;
345     VkQueue queue;
346     uint32_t queueFamilyIdx;
347     uint64_t timelineValue;
348     SharedPtr<Operation> op;
349 };
350 
importResource(const DeviceInterface & vkd,VkDevice device,const ResourceDescription & resourceDesc,const uint32_t queueFamilyIndex,const OperationSupport & readOp,const OperationSupport & writeOp,NativeHandle & nativeHandle,VkExternalMemoryHandleTypeFlagBits externalType,uint32_t exportedMemoryTypeIndex)351 de::MovePtr<Resource> importResource(const DeviceInterface &vkd, VkDevice device,
352                                      const ResourceDescription &resourceDesc, const uint32_t queueFamilyIndex,
353                                      const OperationSupport &readOp, const OperationSupport &writeOp,
354                                      NativeHandle &nativeHandle, VkExternalMemoryHandleTypeFlagBits externalType,
355                                      uint32_t exportedMemoryTypeIndex)
356 {
357     if (resourceDesc.type == RESOURCE_TYPE_IMAGE)
358     {
359         const VkExtent3D extent = {(uint32_t)resourceDesc.size.x(), de::max(1u, (uint32_t)resourceDesc.size.y()),
360                                    de::max(1u, (uint32_t)resourceDesc.size.z())};
361         const VkImageSubresourceRange subresourceRange     = {resourceDesc.imageAspect, 0u, 1u, 0u, 1u};
362         const VkImageSubresourceLayers subresourceLayers   = {resourceDesc.imageAspect, 0u, 0u, 1u};
363         const VkExternalMemoryImageCreateInfo externalInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
364                                                               DE_NULL, (VkExternalMemoryHandleTypeFlags)externalType};
365         const VkImageTiling tiling                         = VK_IMAGE_TILING_OPTIMAL;
366         const VkImageCreateInfo createInfo                 = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
367                                                               &externalInfo,
368                                                               0u,
369 
370                                                               resourceDesc.imageType,
371                                                               resourceDesc.imageFormat,
372                                                               extent,
373                                                               1u,
374                                                               1u,
375                                                               resourceDesc.imageSamples,
376                                                               tiling,
377                                                               readOp.getInResourceUsageFlags() | writeOp.getOutResourceUsageFlags(),
378                                                               VK_SHARING_MODE_EXCLUSIVE,
379 
380                                                               1u,
381                                                               &queueFamilyIndex,
382                                                               VK_IMAGE_LAYOUT_UNDEFINED};
383 
384         Move<VkImage> image = createImage(vkd, device, &createInfo);
385         MovePtr<Allocation> allocation =
386             importAndBindMemory(vkd, device, *image, nativeHandle, externalType, exportedMemoryTypeIndex);
387 
388         return MovePtr<Resource>(new Resource(image, allocation, extent, resourceDesc.imageType,
389                                               resourceDesc.imageFormat, subresourceRange, subresourceLayers, tiling));
390     }
391     else
392     {
393         const VkDeviceSize offset      = 0u;
394         const VkDeviceSize size        = static_cast<VkDeviceSize>(resourceDesc.size.x());
395         const VkBufferUsageFlags usage = readOp.getInResourceUsageFlags() | writeOp.getOutResourceUsageFlags();
396         const VkExternalMemoryBufferCreateInfo externalInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
397                                                                DE_NULL, (VkExternalMemoryHandleTypeFlags)externalType};
398         const VkBufferCreateInfo createInfo                 = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
399                                                                &externalInfo,
400                                                                0u,
401 
402                                                                size,
403                                                                usage,
404                                                                VK_SHARING_MODE_EXCLUSIVE,
405                                                                1u,
406                                                                &queueFamilyIndex};
407         Move<VkBuffer> buffer                               = createBuffer(vkd, device, &createInfo);
408         MovePtr<Allocation> allocation =
409             importAndBindMemory(vkd, device, *buffer, nativeHandle, externalType, exportedMemoryTypeIndex);
410 
411         return MovePtr<Resource>(new Resource(resourceDesc.type, buffer, allocation, offset, size));
412     }
413 }
414 
415 struct QueueSubmitOrderSharedIteration
416 {
QueueSubmitOrderSharedIterationvkt::synchronization::__anonbd832d2a0111::QueueSubmitOrderSharedIteration417     QueueSubmitOrderSharedIteration()
418     {
419     }
~QueueSubmitOrderSharedIterationvkt::synchronization::__anonbd832d2a0111::QueueSubmitOrderSharedIteration420     ~QueueSubmitOrderSharedIteration()
421     {
422     }
423 
424     SharedPtr<Resource> resourceA;
425     SharedPtr<Resource> resourceB;
426 
427     SharedPtr<Operation> writeOp;
428     SharedPtr<Operation> readOp;
429 };
430 
431 // Verifies the signaling order of the semaphores in multiple
432 // VkSubmitInfo given to vkQueueSubmit() with queueA & queueB from a
433 // different VkDevice.
434 //
435 // vkQueueSubmit(queueA, [write0, write1, write2, ..., write6])
436 // vkQueueSubmit(queueB, [read0-6])
437 //
438 // With read0-6 waiting on write6, all the data should be available
439 // for reading given that signal operations are supposed to happen in
440 // order.
441 class QueueSubmitSignalOrderSharedTestInstance : public TestInstance
442 {
443 public:
QueueSubmitSignalOrderSharedTestInstance(Context & context,SynchronizationType type,const SharedPtr<OperationSupport> writeOpSupport,const SharedPtr<OperationSupport> readOpSupport,const ResourceDescription & resourceDesc,VkExternalMemoryHandleTypeFlagBits memoryHandleType,VkSemaphoreType semaphoreType,VkExternalSemaphoreHandleTypeFlagBits semaphoreHandleType,PipelineCacheData & pipelineCacheData)444     QueueSubmitSignalOrderSharedTestInstance(
445         Context &context, SynchronizationType type, const SharedPtr<OperationSupport> writeOpSupport,
446         const SharedPtr<OperationSupport> readOpSupport, const ResourceDescription &resourceDesc,
447         VkExternalMemoryHandleTypeFlagBits memoryHandleType, VkSemaphoreType semaphoreType,
448         VkExternalSemaphoreHandleTypeFlagBits semaphoreHandleType, PipelineCacheData &pipelineCacheData)
449         : TestInstance(context)
450         , m_type(type)
451         , m_writeOpSupport(writeOpSupport)
452         , m_readOpSupport(readOpSupport)
453         , m_resourceDesc(resourceDesc)
454         , m_memoryHandleType(memoryHandleType)
455         , m_semaphoreType(semaphoreType)
456         , m_semaphoreHandleType(semaphoreHandleType)
457         , m_pipelineCacheData(pipelineCacheData)
458         , m_rng(1234)
459 
460     {
461         const InstanceInterface &vki                         = context.getInstanceInterface();
462         const VkSemaphoreTypeCreateInfoKHR semaphoreTypeInfo = {
463             VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
464             DE_NULL,
465             semaphoreType,
466             0,
467         };
468         const VkPhysicalDeviceExternalSemaphoreInfo info = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO,
469                                                             &semaphoreTypeInfo, semaphoreHandleType};
470         VkExternalSemaphoreProperties properties = {VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES, DE_NULL, 0u, 0u,
471                                                     0u};
472 
473         vki.getPhysicalDeviceExternalSemaphoreProperties(context.getPhysicalDevice(), &info, &properties);
474 
475         if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR &&
476             !context.getTimelineSemaphoreFeatures().timelineSemaphore)
477             TCU_THROW(NotSupportedError, "Timeline semaphore not supported");
478 
479         if ((properties.externalSemaphoreFeatures & vk::VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR) == 0 ||
480             (properties.externalSemaphoreFeatures & vk::VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR) == 0)
481             TCU_THROW(NotSupportedError, "Exporting and importing semaphore type not supported");
482 
483         if (!isResourceExportable())
484             TCU_THROW(NotSupportedError, "Resource not exportable");
485     }
486 
createImage(const vk::DeviceInterface & vkd,vk::VkDevice device,const vk::VkExtent3D & extent,uint32_t queueFamilyIndex,vk::VkImageTiling tiling)487     Move<VkImage> createImage(const vk::DeviceInterface &vkd, vk::VkDevice device, const vk::VkExtent3D &extent,
488                               uint32_t queueFamilyIndex, vk::VkImageTiling tiling)
489     {
490         const VkExternalMemoryImageCreateInfo externalInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
491                                                               DE_NULL,
492                                                               (VkExternalMemoryHandleTypeFlags)m_memoryHandleType};
493         const VkImageCreateInfo createInfo                 = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
494                                                               &externalInfo,
495                                                               0u,
496 
497                                                               m_resourceDesc.imageType,
498                                                               m_resourceDesc.imageFormat,
499                                                               extent,
500                                                               1u,
501                                                               1u,
502                                                               m_resourceDesc.imageSamples,
503                                                               tiling,
504                                                               m_readOpSupport->getInResourceUsageFlags() |
505                                                                   m_writeOpSupport->getOutResourceUsageFlags(),
506                                                               VK_SHARING_MODE_EXCLUSIVE,
507 
508                                                               1u,
509                                                               &queueFamilyIndex,
510                                                               VK_IMAGE_LAYOUT_UNDEFINED};
511 
512         return vk::createImage(vkd, device, &createInfo);
513     }
514 
createBuffer(const vk::DeviceInterface & vkd,vk::VkDevice device,const vk::VkDeviceSize & size,uint32_t queueFamilyIndex)515     Move<VkBuffer> createBuffer(const vk::DeviceInterface &vkd, vk::VkDevice device, const vk::VkDeviceSize &size,
516                                 uint32_t queueFamilyIndex)
517     {
518         const VkExternalMemoryBufferCreateInfo externalInfo = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
519                                                                DE_NULL,
520                                                                (VkExternalMemoryHandleTypeFlags)m_memoryHandleType};
521         const VkBufferCreateInfo createInfo                 = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
522                                                                &externalInfo,
523                                                                0u,
524 
525                                                                size,
526                                                                m_readOpSupport->getInResourceUsageFlags() |
527                                                                    m_writeOpSupport->getOutResourceUsageFlags(),
528                                                                VK_SHARING_MODE_EXCLUSIVE,
529                                                                1u,
530                                                                &queueFamilyIndex};
531         return vk::createBuffer(vkd, device, &createInfo);
532     }
533 
iterate(void)534     tcu::TestStatus iterate(void)
535     {
536         // We're using 2 devices to make sure we have 2 queues even on
537         // implementations that only have a single queue.
538         const bool isTimelineSemaphore(m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR);
539         const VkDevice &deviceA = m_context.getDevice();
540         const Unique<VkDevice> &deviceB(SingletonDevice::getDevice(m_context));
541         const DeviceInterface &vkA = m_context.getDeviceInterface();
542         const DeviceDriver vkB(m_context.getPlatformInterface(), m_context.getInstance(), *deviceB,
543                                m_context.getUsedApiVersion(), m_context.getTestContext().getCommandLine());
544         UniquePtr<SimpleAllocator> allocatorA(new SimpleAllocator(
545             vkA, deviceA,
546             vk::getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice())));
547         UniquePtr<SimpleAllocator> allocatorB(new SimpleAllocator(
548             vkB, *deviceB,
549             vk::getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice())));
550         UniquePtr<OperationContext> operationContextA(
551             new OperationContext(m_context, m_type, vkA, deviceA, *allocatorA, m_pipelineCacheData));
552         UniquePtr<OperationContext> operationContextB(
553             new OperationContext(m_context, m_type, vkB, *deviceB, *allocatorB, m_pipelineCacheData));
554         const uint32_t universalQueueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
555         const VkQueue queueA                     = m_context.getUniversalQueue();
556         const VkQueue queueB = getDeviceQueue(vkB, *deviceB, m_context.getUniversalQueueFamilyIndex(), 0);
557         Unique<VkFence> fenceA(createFence(vkA, deviceA));
558         Unique<VkFence> fenceB(createFence(vkB, *deviceB));
559         const Unique<VkCommandPool> cmdPoolA(createCommandPool(
560             vkA, deviceA, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, universalQueueFamilyIndex));
561         const Unique<VkCommandPool> cmdPoolB(createCommandPool(
562             vkB, *deviceB, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, universalQueueFamilyIndex));
563         std::vector<SharedPtr<Move<VkCommandBuffer>>> ptrCmdBuffersA;
564         SharedPtr<Move<VkCommandBuffer>> ptrCmdBufferB;
565         std::vector<VkCommandBuffer> cmdBuffersA;
566         VkCommandBuffer cmdBufferB;
567         std::vector<Move<VkSemaphore>> semaphoresA;
568         std::vector<Move<VkSemaphore>> semaphoresB;
569         std::vector<VkSemaphore> semaphoreHandlesA;
570         std::vector<VkSemaphore> semaphoreHandlesB;
571         std::vector<uint64_t> timelineValuesA;
572         std::vector<uint64_t> timelineValuesB;
573         std::vector<QueueSubmitOrderSharedIteration> iterations(12);
574         std::vector<VkPipelineStageFlags2KHR> stageBits;
575 
576         // These guards will wait for the device to be idle before tearing down the resources above.
577         const DeviceWaitIdleGuard idleGuardA(vkA, deviceA);
578         const DeviceWaitIdleGuard idleGuardB(vkB, *deviceB);
579 
580         // Create a dozen of set of write/read operations.
581         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
582         {
583             QueueSubmitOrderSharedIteration &iter = iterations[iterIdx];
584             uint32_t memoryTypeIndex;
585             NativeHandle nativeMemoryHandle;
586 
587             if (m_resourceDesc.type == RESOURCE_TYPE_IMAGE)
588             {
589                 const VkExtent3D extent                          = {(uint32_t)m_resourceDesc.size.x(),
590                                                                     de::max(1u, (uint32_t)m_resourceDesc.size.y()),
591                                                                     de::max(1u, (uint32_t)m_resourceDesc.size.z())};
592                 const VkImageSubresourceRange subresourceRange   = {m_resourceDesc.imageAspect, 0u, 1u, 0u, 1u};
593                 const VkImageSubresourceLayers subresourceLayers = {m_resourceDesc.imageAspect, 0u, 0u, 1u};
594 
595                 const vk::VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL;
596                 Move<VkImage> image            = createImage(vkA, deviceA, extent, universalQueueFamilyIndex, tiling);
597                 const vk::VkMemoryRequirements requirements = getMemoryRequirements(vkA, deviceA, *image);
598                 memoryTypeIndex                             = chooseMemoryType(requirements.memoryTypeBits);
599                 vk::Move<vk::VkDeviceMemory> memory         = allocateExportableMemory(
600                     vkA, deviceA, requirements.size, memoryTypeIndex, m_memoryHandleType, *image);
601 
602                 VK_CHECK(vkA.bindImageMemory(deviceA, *image, *memory, 0u));
603 
604                 MovePtr<Allocation> allocation(new SimpleAllocation(vkA, deviceA, memory.disown()));
605                 iter.resourceA = makeSharedPtr(new Resource(image, allocation, extent, m_resourceDesc.imageType,
606                                                             m_resourceDesc.imageFormat, subresourceRange,
607                                                             subresourceLayers, tiling));
608             }
609             else
610             {
611                 const VkDeviceSize offset = 0u;
612                 const VkDeviceSize size   = static_cast<VkDeviceSize>(m_resourceDesc.size.x());
613                 Move<VkBuffer> buffer     = createBuffer(vkA, deviceA, size, universalQueueFamilyIndex);
614                 const vk::VkMemoryRequirements requirements = getMemoryRequirements(vkA, deviceA, *buffer);
615                 memoryTypeIndex                             = chooseMemoryType(requirements.memoryTypeBits);
616                 vk::Move<vk::VkDeviceMemory> memory         = allocateExportableMemory(
617                     vkA, deviceA, requirements.size, memoryTypeIndex, m_memoryHandleType, *buffer);
618 
619                 VK_CHECK(vkA.bindBufferMemory(deviceA, *buffer, *memory, 0u));
620 
621                 MovePtr<Allocation> allocation(new SimpleAllocation(vkA, deviceA, memory.disown()));
622                 iter.resourceA = makeSharedPtr(new Resource(m_resourceDesc.type, buffer, allocation, offset, size));
623             }
624 
625             getMemoryNative(vkA, deviceA, iter.resourceA->getMemory(), m_memoryHandleType, nativeMemoryHandle);
626             iter.resourceB = makeSharedPtr(importResource(vkB, *deviceB, m_resourceDesc, universalQueueFamilyIndex,
627                                                           *m_readOpSupport, *m_writeOpSupport, nativeMemoryHandle,
628                                                           m_memoryHandleType, memoryTypeIndex));
629 
630             iter.writeOp = makeSharedPtr(m_writeOpSupport->build(*operationContextA, *iter.resourceA));
631             iter.readOp  = makeSharedPtr(m_readOpSupport->build(*operationContextB, *iter.resourceB));
632         }
633 
634         // Record each write operation into its own command buffer.
635         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
636         {
637             QueueSubmitOrderSharedIteration &iter = iterations[iterIdx];
638             const Resource &resource              = *iter.resourceA;
639             const SyncInfo writeSync              = iter.writeOp->getOutSyncInfo();
640             const SyncInfo readSync               = iter.readOp->getInSyncInfo();
641 
642             ptrCmdBuffersA.push_back(makeVkSharedPtr(makeCommandBuffer(vkA, deviceA, *cmdPoolA)));
643 
644             cmdBuffersA.push_back(**(ptrCmdBuffersA.back()));
645 
646             beginCommandBuffer(vkA, cmdBuffersA.back());
647 
648             iter.writeOp->recordCommands(cmdBuffersA.back());
649 
650             {
651                 SynchronizationWrapperPtr synchronizationWrapper =
652                     getSynchronizationWrapper(m_type, vkA, isTimelineSemaphore);
653 
654                 if (resource.getType() == RESOURCE_TYPE_IMAGE)
655                 {
656                     DE_ASSERT(writeSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
657                     DE_ASSERT(readSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
658 
659                     const VkImageMemoryBarrier2KHR imageMemoryBarrier2 = makeImageMemoryBarrier2(
660                         writeSync.stageMask,                 // VkPipelineStageFlags2KHR            srcStageMask
661                         writeSync.accessMask,                // VkAccessFlags2KHR                srcAccessMask
662                         readSync.stageMask,                  // VkPipelineStageFlags2KHR            dstStageMask
663                         readSync.accessMask,                 // VkAccessFlags2KHR                dstAccessMask
664                         writeSync.imageLayout,               // VkImageLayout                    oldLayout
665                         readSync.imageLayout,                // VkImageLayout                    newLayout
666                         resource.getImage().handle,          // VkImage                            image
667                         resource.getImage().subresourceRange // VkImageSubresourceRange            subresourceRange
668                     );
669                     VkDependencyInfoKHR dependencyInfo =
670                         makeCommonDependencyInfo(DE_NULL, DE_NULL, &imageMemoryBarrier2);
671                     synchronizationWrapper->cmdPipelineBarrier(cmdBuffersA.back(), &dependencyInfo);
672                 }
673                 else
674                 {
675                     const VkBufferMemoryBarrier2KHR bufferMemoryBarrier2 = makeBufferMemoryBarrier2(
676                         writeSync.stageMask,         // VkPipelineStageFlags2KHR            srcStageMask
677                         writeSync.accessMask,        // VkAccessFlags2KHR                srcAccessMask
678                         readSync.stageMask,          // VkPipelineStageFlags2KHR            dstStageMask
679                         readSync.accessMask,         // VkAccessFlags2KHR                dstAccessMask
680                         resource.getBuffer().handle, // VkBuffer                            buffer
681                         0,                           // VkDeviceSize                        offset
682                         VK_WHOLE_SIZE                // VkDeviceSize                        size
683                     );
684                     VkDependencyInfoKHR dependencyInfo = makeCommonDependencyInfo(DE_NULL, &bufferMemoryBarrier2);
685                     synchronizationWrapper->cmdPipelineBarrier(cmdBuffersA.back(), &dependencyInfo);
686                 }
687 
688                 stageBits.push_back(writeSync.stageMask);
689             }
690 
691             endCommandBuffer(vkA, cmdBuffersA.back());
692 
693             addSemaphore(vkA, deviceA, semaphoresA, semaphoreHandlesA, timelineValuesA,
694                          iterIdx == (iterations.size() - 1), 2u);
695         }
696 
697         DE_ASSERT(stageBits.size() == iterations.size());
698         DE_ASSERT(semaphoreHandlesA.size() == iterations.size());
699 
700         // Record all read operations into a single command buffer and record the union of their stage masks.
701         VkPipelineStageFlags2KHR readStages = 0;
702         ptrCmdBufferB                       = makeVkSharedPtr(makeCommandBuffer(vkB, *deviceB, *cmdPoolB));
703         cmdBufferB                          = **(ptrCmdBufferB);
704         beginCommandBuffer(vkB, cmdBufferB);
705         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
706         {
707             QueueSubmitOrderSharedIteration &iter = iterations[iterIdx];
708             readStages |= iter.readOp->getInSyncInfo().stageMask;
709             iter.readOp->recordCommands(cmdBufferB);
710         }
711         endCommandBuffer(vkB, cmdBufferB);
712 
713         // Export the last semaphore for use on deviceB and create another semaphore to signal on deviceB.
714         {
715             VkSemaphore lastSemaphoreA = semaphoreHandlesA.back();
716             NativeHandle nativeSemaphoreHandle;
717 
718             addSemaphore(vkB, *deviceB, semaphoresB, semaphoreHandlesB, timelineValuesB, true, timelineValuesA.back());
719 
720             getSemaphoreNative(vkA, deviceA, lastSemaphoreA, m_semaphoreHandleType, nativeSemaphoreHandle);
721             importSemaphore(vkB, *deviceB, semaphoreHandlesB.back(), m_semaphoreHandleType, nativeSemaphoreHandle, 0u);
722 
723             addSemaphore(vkB, *deviceB, semaphoresB, semaphoreHandlesB, timelineValuesB, false, timelineValuesA.back());
724         }
725 
726         // Submit writes, each in its own VkSubmitInfo. With binary
727         // semaphores, submission don't wait on anything, with
728         // timeline semaphores, submissions wait on a host signal
729         // operation done below.
730         {
731             std::vector<VkCommandBufferSubmitInfoKHR> cmdBuffersInfo(iterations.size(),
732                                                                      makeCommonCommandBufferSubmitInfo(0u));
733             std::vector<VkSemaphoreSubmitInfoKHR> waitSemaphoreSubmitInfos(
734                 iterations.size(), makeCommonSemaphoreSubmitInfo(0u, 1u, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR));
735             std::vector<VkSemaphoreSubmitInfoKHR> signalSemaphoreSubmitInfos(
736                 iterations.size(), makeCommonSemaphoreSubmitInfo(0u, 0u, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR));
737             SynchronizationWrapperPtr synchronizationWrapper =
738                 getSynchronizationWrapper(m_type, vkA, isTimelineSemaphore, static_cast<uint32_t>(iterations.size()));
739 
740             for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
741             {
742                 waitSemaphoreSubmitInfos[iterIdx].semaphore   = semaphoreHandlesA.front();
743                 waitSemaphoreSubmitInfos[iterIdx].stageMask   = stageBits[iterIdx];
744                 signalSemaphoreSubmitInfos[iterIdx].semaphore = semaphoreHandlesA[iterIdx];
745                 signalSemaphoreSubmitInfos[iterIdx].value     = timelineValuesA[iterIdx];
746                 cmdBuffersInfo[iterIdx].commandBuffer         = cmdBuffersA[iterIdx];
747 
748                 synchronizationWrapper->addSubmitInfo(
749                     isTimelineSemaphore, isTimelineSemaphore ? &waitSemaphoreSubmitInfos[iterIdx] : DE_NULL, 1u,
750                     &cmdBuffersInfo[iterIdx], 1u, &signalSemaphoreSubmitInfos[iterIdx], isTimelineSemaphore,
751                     isTimelineSemaphore);
752             }
753 
754             VK_CHECK(synchronizationWrapper->queueSubmit(queueA, *fenceA));
755         }
756 
757         // Submit reads, only waiting waiting on the last write
758         // operations, ordering of signaling should guarantee that
759         // when read operations kick in all writes have completed.
760         {
761             VkCommandBufferSubmitInfoKHR cmdBuffersInfo = makeCommonCommandBufferSubmitInfo(cmdBufferB);
762             VkSemaphoreSubmitInfoKHR waitSemaphoreSubmitInfo =
763                 makeCommonSemaphoreSubmitInfo(semaphoreHandlesB.front(), timelineValuesA.back(), readStages);
764             VkSemaphoreSubmitInfoKHR signalSemaphoreSubmitInfo = makeCommonSemaphoreSubmitInfo(
765                 semaphoreHandlesB.back(), timelineValuesB.back(), VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR);
766             SynchronizationWrapperPtr synchronizationWrapper =
767                 getSynchronizationWrapper(m_type, vkB, isTimelineSemaphore);
768 
769             synchronizationWrapper->addSubmitInfo(1u, &waitSemaphoreSubmitInfo, 1u, &cmdBuffersInfo, 1u,
770                                                   &signalSemaphoreSubmitInfo, isTimelineSemaphore, isTimelineSemaphore);
771 
772             VK_CHECK(synchronizationWrapper->queueSubmit(queueB, *fenceB));
773 
774             if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
775             {
776                 const VkSemaphoreWaitInfo waitInfo = {
777                     VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, // VkStructureType sType;
778                     DE_NULL,                               // const void* pNext;
779                     0u,                                    // VkSemaphoreWaitFlagsKHR flags;
780                     1u,                                    // uint32_t semaphoreCount;
781                     &semaphoreHandlesB.back(),             // const VkSemaphore* pSemaphores;
782                     &timelineValuesB.back(),               // const uint64_t* pValues;
783                 };
784 
785                 // Unblock the whole lot.
786                 hostSignal(vkA, deviceA, semaphoreHandlesA.front(), 2);
787 
788                 VK_CHECK(vkB.waitSemaphores(*deviceB, &waitInfo, ~0ull));
789             }
790             else
791             {
792                 VK_CHECK(vkB.waitForFences(*deviceB, 1, &fenceB.get(), VK_TRUE, ~0ull));
793             }
794         }
795 
796         // Verify the result of the operations.
797         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
798         {
799             QueueSubmitOrderSharedIteration &iter = iterations[iterIdx];
800             const Data expected                   = iter.writeOp->getData();
801             const Data actual                     = iter.readOp->getData();
802 
803             if (isIndirectBuffer(iter.resourceA->getType()))
804             {
805                 const uint32_t expectedValue = reinterpret_cast<const uint32_t *>(expected.data)[0];
806                 const uint32_t actualValue   = reinterpret_cast<const uint32_t *>(actual.data)[0];
807 
808                 if (actualValue < expectedValue)
809                     return tcu::TestStatus::fail("Counter value is smaller than expected");
810             }
811             else
812             {
813                 if (0 != deMemCmp(expected.data, actual.data, expected.size))
814                     return tcu::TestStatus::fail("Memory contents don't match");
815             }
816         }
817 
818         return tcu::TestStatus::pass("Success");
819     }
820 
821 private:
addSemaphore(const DeviceInterface & vk,VkDevice device,std::vector<Move<VkSemaphore>> & semaphores,std::vector<VkSemaphore> & semaphoreHandles,std::vector<uint64_t> & timelineValues,bool exportable,uint64_t firstTimelineValue)822     void addSemaphore(const DeviceInterface &vk, VkDevice device, std::vector<Move<VkSemaphore>> &semaphores,
823                       std::vector<VkSemaphore> &semaphoreHandles, std::vector<uint64_t> &timelineValues,
824                       bool exportable, uint64_t firstTimelineValue)
825     {
826         Move<VkSemaphore> semaphore;
827 
828         if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
829         {
830             // Only allocate a single exportable semaphore.
831             if (semaphores.empty())
832             {
833                 semaphores.push_back(createExportableSemaphoreType(vk, device, m_semaphoreType, m_semaphoreHandleType));
834             }
835         }
836         else
837         {
838             if (exportable)
839                 semaphores.push_back(createExportableSemaphoreType(vk, device, m_semaphoreType, m_semaphoreHandleType));
840             else
841                 semaphores.push_back(createSemaphoreType(vk, device, m_semaphoreType));
842         }
843 
844         semaphoreHandles.push_back(*semaphores.back());
845         timelineValues.push_back((timelineValues.empty() ? firstTimelineValue : timelineValues.back()) +
846                                  m_rng.getInt(1, 100));
847     }
848 
isResourceExportable()849     bool isResourceExportable()
850     {
851         const InstanceInterface &vki    = m_context.getInstanceInterface();
852         VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
853 
854         if (m_resourceDesc.type == RESOURCE_TYPE_IMAGE)
855         {
856             const VkPhysicalDeviceExternalImageFormatInfo externalInfo = {
857                 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, DE_NULL, m_memoryHandleType};
858             const VkPhysicalDeviceImageFormatInfo2 imageFormatInfo = {
859                 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
860                 &externalInfo,
861                 m_resourceDesc.imageFormat,
862                 m_resourceDesc.imageType,
863                 VK_IMAGE_TILING_OPTIMAL,
864                 m_readOpSupport->getInResourceUsageFlags() | m_writeOpSupport->getOutResourceUsageFlags(),
865                 0u};
866             VkExternalImageFormatProperties externalProperties = {
867                 VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES, DE_NULL, {0u, 0u, 0u}};
868             VkImageFormatProperties2 formatProperties = {VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
869                                                          &externalProperties,
870                                                          {
871                                                              {0u, 0u, 0u},
872                                                              0u,
873                                                              0u,
874                                                              0u,
875                                                              0u,
876                                                          }};
877 
878             {
879                 const VkResult res =
880                     vki.getPhysicalDeviceImageFormatProperties2(physicalDevice, &imageFormatInfo, &formatProperties);
881 
882                 if (res == VK_ERROR_FORMAT_NOT_SUPPORTED)
883                     return false;
884 
885                 VK_CHECK(res); // Check other errors
886             }
887 
888             if ((externalProperties.externalMemoryProperties.externalMemoryFeatures &
889                  VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR) == 0)
890                 return false;
891 
892             if ((externalProperties.externalMemoryProperties.externalMemoryFeatures &
893                  VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR) == 0)
894                 return false;
895 
896             return true;
897         }
898         else
899         {
900             const VkPhysicalDeviceExternalBufferInfo info = {
901                 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO, DE_NULL,
902 
903                 0u, m_readOpSupport->getInResourceUsageFlags() | m_writeOpSupport->getOutResourceUsageFlags(),
904                 m_memoryHandleType};
905             VkExternalBufferProperties properties = {
906                 VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES, DE_NULL, {0u, 0u, 0u}};
907             vki.getPhysicalDeviceExternalBufferProperties(physicalDevice, &info, &properties);
908 
909             if ((properties.externalMemoryProperties.externalMemoryFeatures &
910                  VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR) == 0 ||
911                 (properties.externalMemoryProperties.externalMemoryFeatures &
912                  VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR) == 0)
913                 return false;
914 
915             return true;
916         }
917     }
918 
919     SynchronizationType m_type;
920     SharedPtr<OperationSupport> m_writeOpSupport;
921     SharedPtr<OperationSupport> m_readOpSupport;
922     const ResourceDescription &m_resourceDesc;
923     VkExternalMemoryHandleTypeFlagBits m_memoryHandleType;
924     VkSemaphoreType m_semaphoreType;
925     VkExternalSemaphoreHandleTypeFlagBits m_semaphoreHandleType;
926     PipelineCacheData &m_pipelineCacheData;
927     de::Random m_rng;
928 };
929 
930 class QueueSubmitSignalOrderSharedTestCase : public TestCase
931 {
932 public:
QueueSubmitSignalOrderSharedTestCase(tcu::TestContext & testCtx,SynchronizationType type,const std::string & name,OperationName writeOp,OperationName readOp,const ResourceDescription & resourceDesc,VkExternalMemoryHandleTypeFlagBits memoryHandleType,VkSemaphoreType semaphoreType,VkExternalSemaphoreHandleTypeFlagBits semaphoreHandleType,PipelineCacheData & pipelineCacheData)933     QueueSubmitSignalOrderSharedTestCase(tcu::TestContext &testCtx, SynchronizationType type, const std::string &name,
934                                          OperationName writeOp, OperationName readOp,
935                                          const ResourceDescription &resourceDesc,
936                                          VkExternalMemoryHandleTypeFlagBits memoryHandleType,
937                                          VkSemaphoreType semaphoreType,
938                                          VkExternalSemaphoreHandleTypeFlagBits semaphoreHandleType,
939                                          PipelineCacheData &pipelineCacheData)
940         : TestCase(testCtx, name.c_str())
941         , m_type(type)
942         , m_writeOpSupport(makeOperationSupport(writeOp, resourceDesc).release())
943         , m_readOpSupport(makeOperationSupport(readOp, resourceDesc).release())
944         , m_resourceDesc(resourceDesc)
945         , m_memoryHandleType(memoryHandleType)
946         , m_semaphoreType(semaphoreType)
947         , m_semaphoreHandleType(semaphoreHandleType)
948         , m_pipelineCacheData(pipelineCacheData)
949     {
950     }
951 
checkSupport(Context & context) const952     virtual void checkSupport(Context &context) const
953     {
954         if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR &&
955             !context.getTimelineSemaphoreFeatures().timelineSemaphore)
956             TCU_THROW(NotSupportedError, "Timeline semaphore not supported");
957 
958         if ((m_semaphoreHandleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT ||
959              m_semaphoreHandleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) &&
960             !context.isDeviceFunctionalitySupported("VK_KHR_external_semaphore_fd"))
961             TCU_THROW(NotSupportedError, "VK_KHR_external_semaphore_fd not supported");
962 
963         if ((m_semaphoreHandleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT ||
964              m_semaphoreHandleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT) &&
965             !context.isDeviceFunctionalitySupported("VK_KHR_external_semaphore_win32"))
966             TCU_THROW(NotSupportedError, "VK_KHR_external_semaphore_win32 not supported");
967 
968         if (m_type == SynchronizationType::SYNCHRONIZATION2)
969             context.requireDeviceFunctionality("VK_KHR_synchronization2");
970     }
971 
createInstance(Context & context) const972     TestInstance *createInstance(Context &context) const
973     {
974         return new QueueSubmitSignalOrderSharedTestInstance(context, m_type, m_writeOpSupport, m_readOpSupport,
975                                                             m_resourceDesc, m_memoryHandleType, m_semaphoreType,
976                                                             m_semaphoreHandleType, m_pipelineCacheData);
977     }
978 
initPrograms(SourceCollections & programCollection) const979     void initPrograms(SourceCollections &programCollection) const
980     {
981         m_writeOpSupport->initPrograms(programCollection);
982         m_readOpSupport->initPrograms(programCollection);
983     }
984 
985 private:
986     SynchronizationType m_type;
987     SharedPtr<OperationSupport> m_writeOpSupport;
988     SharedPtr<OperationSupport> m_readOpSupport;
989     const ResourceDescription &m_resourceDesc;
990     VkExternalMemoryHandleTypeFlagBits m_memoryHandleType;
991     VkSemaphoreType m_semaphoreType;
992     VkExternalSemaphoreHandleTypeFlagBits m_semaphoreHandleType;
993     PipelineCacheData &m_pipelineCacheData;
994 };
995 
996 class QueueSubmitSignalOrderSharedTests : public tcu::TestCaseGroup
997 {
998 public:
QueueSubmitSignalOrderSharedTests(tcu::TestContext & testCtx,SynchronizationType type,VkSemaphoreType semaphoreType,const char * name)999     QueueSubmitSignalOrderSharedTests(tcu::TestContext &testCtx, SynchronizationType type,
1000                                       VkSemaphoreType semaphoreType, const char *name)
1001         : tcu::TestCaseGroup(testCtx, name)
1002         , m_type(type)
1003         , m_semaphoreType(semaphoreType)
1004     {
1005     }
1006 
init(void)1007     void init(void)
1008     {
1009         static const OperationName writeOps[] = {
1010             OPERATION_NAME_WRITE_COPY_BUFFER,
1011             OPERATION_NAME_WRITE_COPY_BUFFER_TO_IMAGE,
1012             OPERATION_NAME_WRITE_COPY_IMAGE_TO_BUFFER,
1013             OPERATION_NAME_WRITE_COPY_IMAGE,
1014             OPERATION_NAME_WRITE_BLIT_IMAGE,
1015             OPERATION_NAME_WRITE_SSBO_VERTEX,
1016             OPERATION_NAME_WRITE_SSBO_TESSELLATION_CONTROL,
1017             OPERATION_NAME_WRITE_SSBO_TESSELLATION_EVALUATION,
1018             OPERATION_NAME_WRITE_SSBO_GEOMETRY,
1019             OPERATION_NAME_WRITE_SSBO_FRAGMENT,
1020             OPERATION_NAME_WRITE_SSBO_COMPUTE,
1021             OPERATION_NAME_WRITE_SSBO_COMPUTE_INDIRECT,
1022             OPERATION_NAME_WRITE_IMAGE_VERTEX,
1023             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_CONTROL,
1024             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_EVALUATION,
1025             OPERATION_NAME_WRITE_IMAGE_GEOMETRY,
1026             OPERATION_NAME_WRITE_IMAGE_FRAGMENT,
1027             OPERATION_NAME_WRITE_IMAGE_COMPUTE,
1028             OPERATION_NAME_WRITE_IMAGE_COMPUTE_INDIRECT,
1029         };
1030         static const OperationName readOps[] = {
1031             OPERATION_NAME_READ_COPY_BUFFER,
1032             OPERATION_NAME_READ_COPY_BUFFER_TO_IMAGE,
1033             OPERATION_NAME_READ_COPY_IMAGE_TO_BUFFER,
1034             OPERATION_NAME_READ_COPY_IMAGE,
1035             OPERATION_NAME_READ_BLIT_IMAGE,
1036             OPERATION_NAME_READ_UBO_VERTEX,
1037             OPERATION_NAME_READ_UBO_TESSELLATION_CONTROL,
1038             OPERATION_NAME_READ_UBO_TESSELLATION_EVALUATION,
1039             OPERATION_NAME_READ_UBO_GEOMETRY,
1040             OPERATION_NAME_READ_UBO_FRAGMENT,
1041             OPERATION_NAME_READ_UBO_COMPUTE,
1042             OPERATION_NAME_READ_UBO_COMPUTE_INDIRECT,
1043             OPERATION_NAME_READ_SSBO_VERTEX,
1044             OPERATION_NAME_READ_SSBO_TESSELLATION_CONTROL,
1045             OPERATION_NAME_READ_SSBO_TESSELLATION_EVALUATION,
1046             OPERATION_NAME_READ_SSBO_GEOMETRY,
1047             OPERATION_NAME_READ_SSBO_FRAGMENT,
1048             OPERATION_NAME_READ_SSBO_COMPUTE,
1049             OPERATION_NAME_READ_SSBO_COMPUTE_INDIRECT,
1050             OPERATION_NAME_READ_IMAGE_VERTEX,
1051             OPERATION_NAME_READ_IMAGE_TESSELLATION_CONTROL,
1052             OPERATION_NAME_READ_IMAGE_TESSELLATION_EVALUATION,
1053             OPERATION_NAME_READ_IMAGE_GEOMETRY,
1054             OPERATION_NAME_READ_IMAGE_FRAGMENT,
1055             OPERATION_NAME_READ_IMAGE_COMPUTE,
1056             OPERATION_NAME_READ_IMAGE_COMPUTE_INDIRECT,
1057             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW,
1058             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW_INDEXED,
1059             OPERATION_NAME_READ_INDIRECT_BUFFER_DISPATCH,
1060             OPERATION_NAME_READ_VERTEX_INPUT,
1061         };
1062         static const struct
1063         {
1064             VkExternalMemoryHandleTypeFlagBits memoryType;
1065             VkExternalSemaphoreHandleTypeFlagBits semaphoreType;
1066         } exportCases[] = {
1067             // Only semaphore handle types having reference semantic
1068             // are valid for this test.
1069             {
1070                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
1071                 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
1072             },
1073             {
1074                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
1075                 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
1076             },
1077             {
1078                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
1079                 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
1080             },
1081         };
1082 
1083         for (uint32_t writeOpIdx = 0; writeOpIdx < DE_LENGTH_OF_ARRAY(writeOps); writeOpIdx++)
1084             for (uint32_t readOpIdx = 0; readOpIdx < DE_LENGTH_OF_ARRAY(readOps); readOpIdx++)
1085             {
1086                 const OperationName writeOp   = writeOps[writeOpIdx];
1087                 const OperationName readOp    = readOps[readOpIdx];
1088                 const std::string opGroupName = getOperationName(writeOp) + "_" + getOperationName(readOp);
1089                 bool empty                    = true;
1090 
1091                 de::MovePtr<tcu::TestCaseGroup> opGroup(new tcu::TestCaseGroup(m_testCtx, opGroupName.c_str()));
1092 
1093                 for (int resourceNdx = 0; resourceNdx < DE_LENGTH_OF_ARRAY(s_resources); ++resourceNdx)
1094                 {
1095                     const ResourceDescription &resource = s_resources[resourceNdx];
1096 
1097                     if (isResourceSupported(writeOp, resource) && isResourceSupported(readOp, resource))
1098                     {
1099                         for (uint32_t exportIdx = 0; exportIdx < DE_LENGTH_OF_ARRAY(exportCases); exportIdx++)
1100                         {
1101                             std::string caseName = getResourceName(resource) + "_" +
1102                                                    externalSemaphoreTypeToName(exportCases[exportIdx].semaphoreType);
1103 
1104                             opGroup->addChild(new QueueSubmitSignalOrderSharedTestCase(
1105                                 m_testCtx, m_type, caseName, writeOp, readOp, resource,
1106                                 exportCases[exportIdx].memoryType, m_semaphoreType,
1107                                 exportCases[exportIdx].semaphoreType, m_pipelineCacheData));
1108                             empty = false;
1109                         }
1110                     }
1111                 }
1112                 if (!empty)
1113                     addChild(opGroup.release());
1114             }
1115     }
1116 
deinit(void)1117     void deinit(void)
1118     {
1119         cleanupGroup();
1120     }
1121 
1122 private:
1123     SynchronizationType m_type;
1124     VkSemaphoreType m_semaphoreType;
1125     // synchronization.op tests share pipeline cache data to speed up test
1126     // execution.
1127     PipelineCacheData m_pipelineCacheData;
1128 };
1129 
1130 struct QueueSubmitOrderIteration
1131 {
QueueSubmitOrderIterationvkt::synchronization::__anonbd832d2a0111::QueueSubmitOrderIteration1132     QueueSubmitOrderIteration()
1133     {
1134     }
~QueueSubmitOrderIterationvkt::synchronization::__anonbd832d2a0111::QueueSubmitOrderIteration1135     ~QueueSubmitOrderIteration()
1136     {
1137     }
1138 
1139     SharedPtr<Resource> resource;
1140 
1141     SharedPtr<Operation> writeOp;
1142     SharedPtr<Operation> readOp;
1143 };
1144 
1145 // Verifies the signaling order of the semaphores in multiple
1146 // VkSubmitInfo given to vkQueueSubmit() with queueA & queueB from the
1147 // same VkDevice.
1148 //
1149 // vkQueueSubmit(queueA, [write0, write1, write2, ..., write6])
1150 // vkQueueSubmit(queueB, [read0-6])
1151 //
1152 // With read0-6 waiting on write6, all the data should be available
1153 // for reading given that signal operations are supposed to happen in
1154 // order.
1155 class QueueSubmitSignalOrderTestInstance : public TestInstance
1156 {
1157 public:
QueueSubmitSignalOrderTestInstance(Context & context,SynchronizationType type,const SharedPtr<OperationSupport> writeOpSupport,const SharedPtr<OperationSupport> readOpSupport,const ResourceDescription & resourceDesc,VkSemaphoreType semaphoreType,PipelineCacheData & pipelineCacheData)1158     QueueSubmitSignalOrderTestInstance(Context &context, SynchronizationType type,
1159                                        const SharedPtr<OperationSupport> writeOpSupport,
1160                                        const SharedPtr<OperationSupport> readOpSupport,
1161                                        const ResourceDescription &resourceDesc, VkSemaphoreType semaphoreType,
1162                                        PipelineCacheData &pipelineCacheData)
1163         : TestInstance(context)
1164         , m_type(type)
1165         , m_writeOpSupport(writeOpSupport)
1166         , m_readOpSupport(readOpSupport)
1167         , m_resourceDesc(resourceDesc)
1168         , m_semaphoreType(semaphoreType)
1169         , m_device(SingletonDevice::getDevice(context))
1170         , m_deviceInterface(context.getPlatformInterface(), context.getInstance(), *m_device,
1171                             context.getUsedApiVersion(), context.getTestContext().getCommandLine())
1172         , m_allocator(new SimpleAllocator(
1173               m_deviceInterface, *m_device,
1174               getPhysicalDeviceMemoryProperties(context.getInstanceInterface(), context.getPhysicalDevice())))
1175         , m_operationContext(
1176               new OperationContext(context, type, m_deviceInterface, *m_device, *m_allocator, pipelineCacheData))
1177         , m_queueA(DE_NULL)
1178         , m_queueB(DE_NULL)
1179         , m_rng(1234)
1180 
1181     {
1182         const std::vector<VkQueueFamilyProperties> queueFamilyProperties =
1183             getPhysicalDeviceQueueFamilyProperties(context.getInstanceInterface(), context.getPhysicalDevice());
1184 
1185         if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR &&
1186             !context.getTimelineSemaphoreFeatures().timelineSemaphore)
1187             TCU_THROW(NotSupportedError, "Timeline semaphore not supported");
1188 
1189         VkQueueFlags writeOpQueueFlags = m_writeOpSupport->getQueueFlags(*m_operationContext);
1190         for (uint32_t familyIdx = 0; familyIdx < queueFamilyProperties.size(); familyIdx++)
1191         {
1192             if (((queueFamilyProperties[familyIdx].queueFlags & writeOpQueueFlags) == writeOpQueueFlags) ||
1193                 ((writeOpQueueFlags == VK_QUEUE_TRANSFER_BIT) &&
1194                  (((queueFamilyProperties[familyIdx].queueFlags & VK_QUEUE_GRAPHICS_BIT) == VK_QUEUE_GRAPHICS_BIT) ||
1195                   ((queueFamilyProperties[familyIdx].queueFlags & VK_QUEUE_COMPUTE_BIT) == VK_QUEUE_COMPUTE_BIT))))
1196             {
1197                 m_queueA            = getDeviceQueue(m_deviceInterface, *m_device, familyIdx, 0);
1198                 m_queueFamilyIndexA = familyIdx;
1199                 break;
1200             }
1201         }
1202         if (m_queueA == DE_NULL)
1203             TCU_THROW(NotSupportedError, "No queue supporting write operation");
1204 
1205         VkQueueFlags readOpQueueFlags = m_readOpSupport->getQueueFlags(*m_operationContext);
1206         for (uint32_t familyIdx = 0; familyIdx < queueFamilyProperties.size(); familyIdx++)
1207         {
1208             if (((queueFamilyProperties[familyIdx].queueFlags & readOpQueueFlags) == readOpQueueFlags) ||
1209                 ((readOpQueueFlags == VK_QUEUE_TRANSFER_BIT) &&
1210                  (((queueFamilyProperties[familyIdx].queueFlags & VK_QUEUE_GRAPHICS_BIT) == VK_QUEUE_GRAPHICS_BIT) ||
1211                   ((queueFamilyProperties[familyIdx].queueFlags & VK_QUEUE_COMPUTE_BIT) == VK_QUEUE_COMPUTE_BIT))))
1212             {
1213                 for (uint32_t queueIdx = 0; queueIdx < queueFamilyProperties[familyIdx].queueCount; queueIdx++)
1214                 {
1215                     VkQueue queue = getDeviceQueue(m_deviceInterface, *m_device, familyIdx, queueIdx);
1216 
1217                     if (queue == m_queueA)
1218                         continue;
1219 
1220                     m_queueB            = queue;
1221                     m_queueFamilyIndexB = familyIdx;
1222                     break;
1223                 }
1224 
1225                 if (m_queueB != DE_NULL)
1226                     break;
1227             }
1228         }
1229         if (m_queueB == DE_NULL)
1230             TCU_THROW(NotSupportedError, "No queue supporting read operation");
1231     }
1232 
iterate(void)1233     tcu::TestStatus iterate(void)
1234     {
1235         const bool isTimelineSemaphore = (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR);
1236         const VkDevice &device         = *m_device;
1237         const DeviceInterface &vk      = m_deviceInterface;
1238         Unique<VkFence> fence(createFence(vk, device));
1239         const Unique<VkCommandPool> cmdPoolA(
1240             createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_queueFamilyIndexA));
1241         const Unique<VkCommandPool> cmdPoolB(
1242             createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_queueFamilyIndexB));
1243         std::vector<SharedPtr<Move<VkCommandBuffer>>> ptrCmdBuffersA;
1244         SharedPtr<Move<VkCommandBuffer>> ptrCmdBufferB;
1245         std::vector<VkCommandBuffer> cmdBuffersA;
1246         VkCommandBuffer cmdBufferB;
1247         std::vector<Move<VkSemaphore>> semaphoresA;
1248         std::vector<Move<VkSemaphore>> semaphoresB;
1249         std::vector<VkSemaphore> semaphoreHandlesA;
1250         std::vector<VkSemaphore> semaphoreHandlesB;
1251         std::vector<uint64_t> timelineValuesA;
1252         std::vector<uint64_t> timelineValuesB;
1253         std::vector<QueueSubmitOrderIteration> iterations;
1254         std::vector<VkPipelineStageFlags2KHR> stageBits;
1255         std::vector<uint32_t> queueFamilies;
1256         SynchronizationWrapperPtr syncWrapper = getSynchronizationWrapper(m_type, vk, isTimelineSemaphore);
1257 
1258         // This guard will wait for the device to be idle before tearing down the resources above.
1259         const DeviceWaitIdleGuard idleGuard(vk, device);
1260 
1261         queueFamilies.push_back(m_queueFamilyIndexA);
1262         queueFamilies.push_back(m_queueFamilyIndexB);
1263 
1264         // Create a dozen of set of write/read operations.
1265         iterations.resize(12);
1266         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
1267         {
1268             QueueSubmitOrderIteration &iter = iterations[iterIdx];
1269 
1270             iter.resource = makeSharedPtr(
1271                 new Resource(*m_operationContext, m_resourceDesc,
1272                              m_writeOpSupport->getOutResourceUsageFlags() | m_readOpSupport->getInResourceUsageFlags(),
1273                              VK_SHARING_MODE_EXCLUSIVE, queueFamilies));
1274 
1275             iter.writeOp = makeSharedPtr(m_writeOpSupport->build(*m_operationContext, *iter.resource));
1276             iter.readOp  = makeSharedPtr(m_readOpSupport->build(*m_operationContext, *iter.resource));
1277         }
1278 
1279         // Record each write operation into its own command buffer.
1280         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
1281         {
1282             QueueSubmitOrderIteration &iter = iterations[iterIdx];
1283 
1284             ptrCmdBuffersA.push_back(makeVkSharedPtr(makeCommandBuffer(vk, device, *cmdPoolA)));
1285             cmdBuffersA.push_back(**(ptrCmdBuffersA.back()));
1286 
1287             beginCommandBuffer(vk, cmdBuffersA.back());
1288             iter.writeOp->recordCommands(cmdBuffersA.back());
1289 
1290             {
1291                 SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(m_type, vk, false);
1292                 const SyncInfo writeSync                         = iter.writeOp->getOutSyncInfo();
1293                 const SyncInfo readSync                          = iter.readOp->getInSyncInfo();
1294                 const Resource &resource                         = *iter.resource;
1295 
1296                 if (resource.getType() == RESOURCE_TYPE_IMAGE)
1297                 {
1298                     DE_ASSERT(writeSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1299                     DE_ASSERT(readSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1300 
1301                     const VkImageMemoryBarrier2KHR imageMemoryBarrier2 = makeImageMemoryBarrier2(
1302                         writeSync.stageMask,                 // VkPipelineStageFlags2KHR            srcStageMask
1303                         writeSync.accessMask,                // VkAccessFlags2KHR                srcAccessMask
1304                         readSync.stageMask,                  // VkPipelineStageFlags2KHR            dstStageMask
1305                         readSync.accessMask,                 // VkAccessFlags2KHR                dstAccessMask
1306                         writeSync.imageLayout,               // VkImageLayout                    oldLayout
1307                         readSync.imageLayout,                // VkImageLayout                    newLayout
1308                         resource.getImage().handle,          // VkImage                            image
1309                         resource.getImage().subresourceRange // VkImageSubresourceRange            subresourceRange
1310                     );
1311                     VkDependencyInfoKHR dependencyInfo =
1312                         makeCommonDependencyInfo(DE_NULL, DE_NULL, &imageMemoryBarrier2);
1313                     synchronizationWrapper->cmdPipelineBarrier(cmdBuffersA.back(), &dependencyInfo);
1314                 }
1315                 else
1316                 {
1317                     const VkBufferMemoryBarrier2KHR bufferMemoryBarrier2 = makeBufferMemoryBarrier2(
1318                         writeSync.stageMask,         // VkPipelineStageFlags2KHR            srcStageMask
1319                         writeSync.accessMask,        // VkAccessFlags2KHR                srcAccessMask
1320                         readSync.stageMask,          // VkPipelineStageFlags2KHR            dstStageMask
1321                         readSync.accessMask,         // VkAccessFlags2KHR                dstAccessMask
1322                         resource.getBuffer().handle, // VkBuffer                            buffer
1323                         0,                           // VkDeviceSize                        offset
1324                         VK_WHOLE_SIZE                // VkDeviceSize                        size
1325                     );
1326                     VkDependencyInfoKHR dependencyInfo = makeCommonDependencyInfo(DE_NULL, &bufferMemoryBarrier2);
1327                     synchronizationWrapper->cmdPipelineBarrier(cmdBuffersA.back(), &dependencyInfo);
1328                 }
1329 
1330                 stageBits.push_back(writeSync.stageMask);
1331             }
1332 
1333             endCommandBuffer(vk, cmdBuffersA.back());
1334 
1335             addSemaphore(vk, device, semaphoresA, semaphoreHandlesA, timelineValuesA, 2u);
1336         }
1337 
1338         DE_ASSERT(stageBits.size() == iterations.size());
1339         DE_ASSERT(semaphoreHandlesA.size() == iterations.size());
1340 
1341         // Record all read operations into a single command buffer and track the union of their execution stages.
1342         ptrCmdBufferB = makeVkSharedPtr(makeCommandBuffer(vk, device, *cmdPoolB));
1343         cmdBufferB    = **(ptrCmdBufferB);
1344         beginCommandBuffer(vk, cmdBufferB);
1345         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
1346         {
1347             QueueSubmitOrderIteration &iter = iterations[iterIdx];
1348             iter.readOp->recordCommands(cmdBufferB);
1349         }
1350         endCommandBuffer(vk, cmdBufferB);
1351 
1352         addSemaphore(vk, device, semaphoresB, semaphoreHandlesB, timelineValuesB, timelineValuesA.back());
1353 
1354         // Submit writes, each in its own VkSubmitInfo. With binary
1355         // semaphores, submission don't wait on anything, with
1356         // timeline semaphores, submissions wait on a host signal
1357         // operation done below.
1358         {
1359             VkSemaphoreSubmitInfoKHR waitSemaphoreSubmitInfo =
1360                 makeCommonSemaphoreSubmitInfo(semaphoreHandlesA.front(), 1u, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR);
1361             std::vector<VkSemaphoreSubmitInfoKHR> signalSemaphoreSubmitInfo(
1362                 iterations.size(), makeCommonSemaphoreSubmitInfo(0u, 0u, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR));
1363             std::vector<VkCommandBufferSubmitInfoKHR> commandBufferSubmitInfos(iterations.size(),
1364                                                                                makeCommonCommandBufferSubmitInfo(0));
1365             SynchronizationWrapperPtr synchronizationWrapper =
1366                 getSynchronizationWrapper(m_type, vk, isTimelineSemaphore, (uint32_t)iterations.size());
1367 
1368             for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
1369             {
1370                 commandBufferSubmitInfos[iterIdx].commandBuffer = cmdBuffersA[iterIdx];
1371                 signalSemaphoreSubmitInfo[iterIdx].semaphore    = semaphoreHandlesA[iterIdx];
1372                 signalSemaphoreSubmitInfo[iterIdx].value        = timelineValuesA[iterIdx];
1373 
1374                 synchronizationWrapper->addSubmitInfo(
1375                     isTimelineSemaphore, isTimelineSemaphore ? &waitSemaphoreSubmitInfo : DE_NULL, 1u,
1376                     &commandBufferSubmitInfos[iterIdx], 1u, &signalSemaphoreSubmitInfo[iterIdx], isTimelineSemaphore,
1377                     isTimelineSemaphore);
1378             }
1379 
1380             VK_CHECK(synchronizationWrapper->queueSubmit(m_queueA, DE_NULL));
1381         }
1382 
1383         // Submit reads, only waiting waiting on the last write
1384         // operations, ordering of signaling should guarantee that
1385         // when read operations kick in all writes have completed.
1386         {
1387             VkCommandBufferSubmitInfoKHR commandBufferSubmitInfos = makeCommonCommandBufferSubmitInfo(cmdBufferB);
1388             VkSemaphoreSubmitInfoKHR waitSemaphoreSubmitInfo      = makeCommonSemaphoreSubmitInfo(
1389                 semaphoreHandlesA.back(), timelineValuesA.back(), VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR);
1390             VkSemaphoreSubmitInfoKHR signalSemaphoreSubmitInfo = makeCommonSemaphoreSubmitInfo(
1391                 semaphoreHandlesB.back(), timelineValuesB.back(), VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR);
1392             SynchronizationWrapperPtr synchronizationWrapper =
1393                 getSynchronizationWrapper(m_type, vk, isTimelineSemaphore);
1394 
1395             synchronizationWrapper->addSubmitInfo(
1396                 1u,                         // uint32_t                                waitSemaphoreInfoCount
1397                 &waitSemaphoreSubmitInfo,   // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
1398                 1u,                         // uint32_t                                commandBufferInfoCount
1399                 &commandBufferSubmitInfos,  // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
1400                 1u,                         // uint32_t                                signalSemaphoreInfoCount
1401                 &signalSemaphoreSubmitInfo, // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
1402                 isTimelineSemaphore, isTimelineSemaphore);
1403 
1404             VK_CHECK(synchronizationWrapper->queueSubmit(m_queueB, *fence));
1405 
1406             if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
1407             {
1408                 const VkSemaphoreWaitInfo waitInfo = {
1409                     VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, // VkStructureType sType;
1410                     DE_NULL,                               // const void* pNext;
1411                     0u,                                    // VkSemaphoreWaitFlagsKHR flags;
1412                     1u,                                    // uint32_t semaphoreCount;
1413                     &semaphoreHandlesB.back(),             // const VkSemaphore* pSemaphores;
1414                     &timelineValuesB.back(),               // const uint64_t* pValues;
1415                 };
1416 
1417                 // Unblock the whole lot.
1418                 hostSignal(vk, device, semaphoreHandlesA.front(), 1);
1419 
1420                 VK_CHECK(vk.waitSemaphores(device, &waitInfo, ~0ull));
1421             }
1422             else
1423             {
1424                 VK_CHECK(vk.waitForFences(device, 1, &fence.get(), VK_TRUE, ~0ull));
1425             }
1426         }
1427 
1428         // Verify the result of the operations.
1429         for (uint32_t iterIdx = 0; iterIdx < iterations.size(); iterIdx++)
1430         {
1431             QueueSubmitOrderIteration &iter = iterations[iterIdx];
1432             const Data expected             = iter.writeOp->getData();
1433             const Data actual               = iter.readOp->getData();
1434 
1435             if (isIndirectBuffer(iter.resource->getType()))
1436             {
1437                 const uint32_t expectedValue = reinterpret_cast<const uint32_t *>(expected.data)[0];
1438                 const uint32_t actualValue   = reinterpret_cast<const uint32_t *>(actual.data)[0];
1439 
1440                 if (actualValue < expectedValue)
1441                     return tcu::TestStatus::fail("Counter value is smaller than expected");
1442             }
1443             else
1444             {
1445                 if (0 != deMemCmp(expected.data, actual.data, expected.size))
1446                     return tcu::TestStatus::fail("Memory contents don't match");
1447             }
1448         }
1449 
1450         return tcu::TestStatus::pass("Success");
1451     }
1452 
1453 private:
addSemaphore(const DeviceInterface & vk,VkDevice device,std::vector<Move<VkSemaphore>> & semaphores,std::vector<VkSemaphore> & semaphoreHandles,std::vector<uint64_t> & timelineValues,uint64_t firstTimelineValue)1454     void addSemaphore(const DeviceInterface &vk, VkDevice device, std::vector<Move<VkSemaphore>> &semaphores,
1455                       std::vector<VkSemaphore> &semaphoreHandles, std::vector<uint64_t> &timelineValues,
1456                       uint64_t firstTimelineValue)
1457     {
1458         Move<VkSemaphore> semaphore;
1459 
1460         if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
1461         {
1462             // Only allocate a single exportable semaphore.
1463             if (semaphores.empty())
1464             {
1465                 semaphores.push_back(createSemaphoreType(vk, device, m_semaphoreType));
1466             }
1467         }
1468         else
1469         {
1470             semaphores.push_back(createSemaphoreType(vk, device, m_semaphoreType));
1471         }
1472 
1473         semaphoreHandles.push_back(*semaphores.back());
1474         timelineValues.push_back((timelineValues.empty() ? firstTimelineValue : timelineValues.back()) +
1475                                  m_rng.getInt(1, 100));
1476     }
1477 
1478     SynchronizationType m_type;
1479     SharedPtr<OperationSupport> m_writeOpSupport;
1480     SharedPtr<OperationSupport> m_readOpSupport;
1481     const ResourceDescription &m_resourceDesc;
1482     VkSemaphoreType m_semaphoreType;
1483     const Unique<VkDevice> &m_device;
1484     const DeviceDriver m_deviceInterface;
1485     UniquePtr<SimpleAllocator> m_allocator;
1486     UniquePtr<OperationContext> m_operationContext;
1487     VkQueue m_queueA;
1488     VkQueue m_queueB;
1489     uint32_t m_queueFamilyIndexA;
1490     uint32_t m_queueFamilyIndexB;
1491     de::Random m_rng;
1492 };
1493 
1494 class QueueSubmitSignalOrderTestCase : public TestCase
1495 {
1496 public:
QueueSubmitSignalOrderTestCase(tcu::TestContext & testCtx,SynchronizationType type,const std::string & name,OperationName writeOp,OperationName readOp,const ResourceDescription & resourceDesc,VkSemaphoreType semaphoreType,PipelineCacheData & pipelineCacheData)1497     QueueSubmitSignalOrderTestCase(tcu::TestContext &testCtx, SynchronizationType type, const std::string &name,
1498                                    OperationName writeOp, OperationName readOp, const ResourceDescription &resourceDesc,
1499                                    VkSemaphoreType semaphoreType, PipelineCacheData &pipelineCacheData)
1500         : TestCase(testCtx, name.c_str())
1501         , m_type(type)
1502         , m_writeOpSupport(makeOperationSupport(writeOp, resourceDesc).release())
1503         , m_readOpSupport(makeOperationSupport(readOp, resourceDesc).release())
1504         , m_resourceDesc(resourceDesc)
1505         , m_semaphoreType(semaphoreType)
1506         , m_pipelineCacheData(pipelineCacheData)
1507     {
1508     }
1509 
checkSupport(Context & context) const1510     virtual void checkSupport(Context &context) const
1511     {
1512         if (m_semaphoreType == VK_SEMAPHORE_TYPE_TIMELINE_KHR &&
1513             !context.getTimelineSemaphoreFeatures().timelineSemaphore)
1514             TCU_THROW(NotSupportedError, "Timeline semaphore not supported");
1515         if (m_type == SynchronizationType::SYNCHRONIZATION2)
1516             context.requireDeviceFunctionality("VK_KHR_synchronization2");
1517     }
1518 
createInstance(Context & context) const1519     TestInstance *createInstance(Context &context) const
1520     {
1521         return new QueueSubmitSignalOrderTestInstance(context, m_type, m_writeOpSupport, m_readOpSupport,
1522                                                       m_resourceDesc, m_semaphoreType, m_pipelineCacheData);
1523     }
1524 
initPrograms(SourceCollections & programCollection) const1525     void initPrograms(SourceCollections &programCollection) const
1526     {
1527         m_writeOpSupport->initPrograms(programCollection);
1528         m_readOpSupport->initPrograms(programCollection);
1529     }
1530 
1531 private:
1532     SynchronizationType m_type;
1533     SharedPtr<OperationSupport> m_writeOpSupport;
1534     SharedPtr<OperationSupport> m_readOpSupport;
1535     const ResourceDescription &m_resourceDesc;
1536     VkSemaphoreType m_semaphoreType;
1537     PipelineCacheData &m_pipelineCacheData;
1538 };
1539 
1540 class QueueSubmitSignalOrderTests : public tcu::TestCaseGroup
1541 {
1542 public:
QueueSubmitSignalOrderTests(tcu::TestContext & testCtx,SynchronizationType type,VkSemaphoreType semaphoreType,const char * name)1543     QueueSubmitSignalOrderTests(tcu::TestContext &testCtx, SynchronizationType type, VkSemaphoreType semaphoreType,
1544                                 const char *name)
1545         : tcu::TestCaseGroup(testCtx, name)
1546         , m_type(type)
1547         , m_semaphoreType(semaphoreType)
1548     {
1549     }
1550 
init(void)1551     void init(void)
1552     {
1553         static const OperationName writeOps[] = {
1554             OPERATION_NAME_WRITE_COPY_BUFFER,
1555             OPERATION_NAME_WRITE_COPY_BUFFER_TO_IMAGE,
1556             OPERATION_NAME_WRITE_COPY_IMAGE_TO_BUFFER,
1557             OPERATION_NAME_WRITE_COPY_IMAGE,
1558             OPERATION_NAME_WRITE_BLIT_IMAGE,
1559             OPERATION_NAME_WRITE_SSBO_VERTEX,
1560             OPERATION_NAME_WRITE_SSBO_TESSELLATION_CONTROL,
1561             OPERATION_NAME_WRITE_SSBO_TESSELLATION_EVALUATION,
1562             OPERATION_NAME_WRITE_SSBO_GEOMETRY,
1563             OPERATION_NAME_WRITE_SSBO_FRAGMENT,
1564             OPERATION_NAME_WRITE_SSBO_COMPUTE,
1565             OPERATION_NAME_WRITE_SSBO_COMPUTE_INDIRECT,
1566             OPERATION_NAME_WRITE_IMAGE_VERTEX,
1567             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_CONTROL,
1568             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_EVALUATION,
1569             OPERATION_NAME_WRITE_IMAGE_GEOMETRY,
1570             OPERATION_NAME_WRITE_IMAGE_FRAGMENT,
1571             OPERATION_NAME_WRITE_IMAGE_COMPUTE,
1572             OPERATION_NAME_WRITE_IMAGE_COMPUTE_INDIRECT,
1573         };
1574         static const OperationName readOps[] = {
1575             OPERATION_NAME_READ_COPY_BUFFER,
1576             OPERATION_NAME_READ_COPY_BUFFER_TO_IMAGE,
1577             OPERATION_NAME_READ_COPY_IMAGE_TO_BUFFER,
1578             OPERATION_NAME_READ_COPY_IMAGE,
1579             OPERATION_NAME_READ_BLIT_IMAGE,
1580             OPERATION_NAME_READ_UBO_VERTEX,
1581             OPERATION_NAME_READ_UBO_TESSELLATION_CONTROL,
1582             OPERATION_NAME_READ_UBO_TESSELLATION_EVALUATION,
1583             OPERATION_NAME_READ_UBO_GEOMETRY,
1584             OPERATION_NAME_READ_UBO_FRAGMENT,
1585             OPERATION_NAME_READ_UBO_COMPUTE,
1586             OPERATION_NAME_READ_UBO_COMPUTE_INDIRECT,
1587             OPERATION_NAME_READ_SSBO_VERTEX,
1588             OPERATION_NAME_READ_SSBO_TESSELLATION_CONTROL,
1589             OPERATION_NAME_READ_SSBO_TESSELLATION_EVALUATION,
1590             OPERATION_NAME_READ_SSBO_GEOMETRY,
1591             OPERATION_NAME_READ_SSBO_FRAGMENT,
1592             OPERATION_NAME_READ_SSBO_COMPUTE,
1593             OPERATION_NAME_READ_SSBO_COMPUTE_INDIRECT,
1594             OPERATION_NAME_READ_IMAGE_VERTEX,
1595             OPERATION_NAME_READ_IMAGE_TESSELLATION_CONTROL,
1596             OPERATION_NAME_READ_IMAGE_TESSELLATION_EVALUATION,
1597             OPERATION_NAME_READ_IMAGE_GEOMETRY,
1598             OPERATION_NAME_READ_IMAGE_FRAGMENT,
1599             OPERATION_NAME_READ_IMAGE_COMPUTE,
1600             OPERATION_NAME_READ_IMAGE_COMPUTE_INDIRECT,
1601             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW,
1602             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW_INDEXED,
1603             OPERATION_NAME_READ_INDIRECT_BUFFER_DISPATCH,
1604             OPERATION_NAME_READ_VERTEX_INPUT,
1605         };
1606 
1607         for (uint32_t writeOpIdx = 0; writeOpIdx < DE_LENGTH_OF_ARRAY(writeOps); writeOpIdx++)
1608             for (uint32_t readOpIdx = 0; readOpIdx < DE_LENGTH_OF_ARRAY(readOps); readOpIdx++)
1609             {
1610                 const OperationName writeOp   = writeOps[writeOpIdx];
1611                 const OperationName readOp    = readOps[readOpIdx];
1612                 const std::string opGroupName = getOperationName(writeOp) + "_" + getOperationName(readOp);
1613                 bool empty                    = true;
1614 
1615                 de::MovePtr<tcu::TestCaseGroup> opGroup(new tcu::TestCaseGroup(m_testCtx, opGroupName.c_str()));
1616 
1617                 for (int resourceNdx = 0; resourceNdx < DE_LENGTH_OF_ARRAY(s_resources); ++resourceNdx)
1618                 {
1619                     const ResourceDescription &resource = s_resources[resourceNdx];
1620 
1621                     if (isResourceSupported(writeOp, resource) && isResourceSupported(readOp, resource))
1622                     {
1623                         opGroup->addChild(
1624                             new QueueSubmitSignalOrderTestCase(m_testCtx, m_type, getResourceName(resource), writeOp,
1625                                                                readOp, resource, m_semaphoreType, m_pipelineCacheData));
1626                         empty = false;
1627                     }
1628                 }
1629                 if (!empty)
1630                     addChild(opGroup.release());
1631             }
1632     }
1633 
deinit(void)1634     void deinit(void)
1635     {
1636         cleanupGroup();
1637     }
1638 
1639 private:
1640     SynchronizationType m_type;
1641     VkSemaphoreType m_semaphoreType;
1642     // synchronization.op tests share pipeline cache data to speed up test
1643     // execution.
1644     PipelineCacheData m_pipelineCacheData;
1645 };
1646 
1647 } // namespace
1648 
createSignalOrderTests(tcu::TestContext & testCtx,SynchronizationType type)1649 tcu::TestCaseGroup *createSignalOrderTests(tcu::TestContext &testCtx, SynchronizationType type)
1650 {
1651     de::MovePtr<tcu::TestCaseGroup> orderingTests(new tcu::TestCaseGroup(testCtx, "signal_order"));
1652 
1653     orderingTests->addChild(
1654         new QueueSubmitSignalOrderTests(testCtx, type, VK_SEMAPHORE_TYPE_BINARY_KHR, "binary_semaphore"));
1655     orderingTests->addChild(
1656         new QueueSubmitSignalOrderTests(testCtx, type, VK_SEMAPHORE_TYPE_TIMELINE_KHR, "timeline_semaphore"));
1657     orderingTests->addChild(
1658         new QueueSubmitSignalOrderSharedTests(testCtx, type, VK_SEMAPHORE_TYPE_BINARY_KHR, "shared_binary_semaphore"));
1659     orderingTests->addChild(new QueueSubmitSignalOrderSharedTests(testCtx, type, VK_SEMAPHORE_TYPE_TIMELINE_KHR,
1660                                                                   "shared_timeline_semaphore"));
1661 
1662     return orderingTests.release();
1663 }
1664 
1665 } // namespace synchronization
1666 } // namespace vkt
1667