1 
2 /*------------------------------------------------------------------------
3  * Vulkan Conformance Tests
4  * ------------------------
5  *
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Synchronization timeline semaphore tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktSynchronizationBasicSemaphoreTests.hpp"
26 #include "vktSynchronizationOperation.hpp"
27 #include "vktSynchronizationOperationTestData.hpp"
28 #include "vktSynchronizationOperationResources.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktSynchronizationUtil.hpp"
31 #include "vktExternalMemoryUtil.hpp"
32 #include "vktCustomInstancesDevices.hpp"
33 #include "vkBarrierUtil.hpp"
34 
35 #include "vkDefs.hpp"
36 #include "vkPlatform.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkDeviceUtil.hpp"
39 #include "vkCmdUtil.hpp"
40 #include "vkImageUtil.hpp"
41 #include "vkRef.hpp"
42 #include "vkTypeUtil.hpp"
43 #include "vkBufferWithMemory.hpp"
44 #include "vkSafetyCriticalUtil.hpp"
45 
46 #include "tcuTestLog.hpp"
47 #include "tcuCommandLine.hpp"
48 
49 #include "deClock.h"
50 #include "deRandom.hpp"
51 #include "deThread.hpp"
52 #include "deUniquePtr.hpp"
53 
54 #include <limits>
55 #include <set>
56 #include <iterator>
57 #include <algorithm>
58 #include <sstream>
59 
60 namespace vkt
61 {
62 namespace synchronization
63 {
64 namespace
65 {
66 
67 using namespace vk;
68 using de::MovePtr;
69 using de::SharedPtr;
70 using tcu::TestLog;
71 
72 template <typename T>
makeVkSharedPtr(Move<T> move)73 inline SharedPtr<Move<T>> makeVkSharedPtr(Move<T> move)
74 {
75     return SharedPtr<Move<T>>(new Move<T>(move));
76 }
77 
78 template <typename T>
makeSharedPtr(de::MovePtr<T> move)79 inline SharedPtr<T> makeSharedPtr(de::MovePtr<T> move)
80 {
81     return SharedPtr<T>(move.release());
82 }
83 
84 template <typename T>
makeSharedPtr(T * ptr)85 inline SharedPtr<T> makeSharedPtr(T *ptr)
86 {
87     return SharedPtr<T>(ptr);
88 }
89 
getMaxTimelineSemaphoreValueDifference(const InstanceInterface & vk,const VkPhysicalDevice physicalDevice)90 uint64_t getMaxTimelineSemaphoreValueDifference(const InstanceInterface &vk, const VkPhysicalDevice physicalDevice)
91 {
92     VkPhysicalDeviceTimelineSemaphoreProperties timelineSemaphoreProperties;
93     VkPhysicalDeviceProperties2 properties;
94 
95     deMemset(&timelineSemaphoreProperties, 0, sizeof(timelineSemaphoreProperties));
96     timelineSemaphoreProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES;
97 
98     deMemset(&properties, 0, sizeof(properties));
99     properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
100     properties.pNext = &timelineSemaphoreProperties;
101 
102     vk.getPhysicalDeviceProperties2(physicalDevice, &properties);
103 
104     return timelineSemaphoreProperties.maxTimelineSemaphoreValueDifference;
105 }
106 
deviceSignal(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const VkFence fence,const SynchronizationType type,const VkSemaphore semaphore,const uint64_t timelineValue)107 void deviceSignal(const DeviceInterface &vk, const VkDevice device, const VkQueue queue, const VkFence fence,
108                   const SynchronizationType type, const VkSemaphore semaphore, const uint64_t timelineValue)
109 {
110     {
111         VkSemaphoreSubmitInfoKHR signalSemaphoreSubmitInfo =
112             makeCommonSemaphoreSubmitInfo(semaphore, timelineValue, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR);
113         SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(type, vk, true);
114         synchronizationWrapper->addSubmitInfo(
115             0u,                         // uint32_t                                waitSemaphoreInfoCount
116             DE_NULL,                    // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
117             0u,                         // uint32_t                                commandBufferInfoCount
118             DE_NULL,                    // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
119             1u,                         // uint32_t                                signalSemaphoreInfoCount
120             &signalSemaphoreSubmitInfo, // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
121             false, true);
122         VK_CHECK(synchronizationWrapper->queueSubmit(queue, DE_NULL));
123     }
124 
125     if (fence != DE_NULL)
126     {
127         SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(type, vk, 1u);
128         synchronizationWrapper->addSubmitInfo(0u,      // uint32_t                                waitSemaphoreInfoCount
129                                               DE_NULL, // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
130                                               0u,      // uint32_t                                commandBufferInfoCount
131                                               DE_NULL, // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
132                                               0u, // uint32_t                                signalSemaphoreInfoCount
133                                               DE_NULL // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
134         );
135         VK_CHECK(synchronizationWrapper->queueSubmit(queue, fence));
136         VK_CHECK(vk.waitForFences(device, 1u, &fence, VK_TRUE, ~(0ull)));
137     }
138 }
139 
hostSignal(const DeviceInterface & vk,const VkDevice & device,VkSemaphore semaphore,const uint64_t timelineValue)140 void hostSignal(const DeviceInterface &vk, const VkDevice &device, VkSemaphore semaphore, const uint64_t timelineValue)
141 {
142     VkSemaphoreSignalInfo ssi = {
143         VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, // VkStructureType sType;
144         DE_NULL,                                 // const void* pNext;
145         semaphore,                               // VkSemaphore semaphore;
146         timelineValue,                           // uint64_t value;
147     };
148 
149     VK_CHECK(vk.signalSemaphore(device, &ssi));
150 }
151 
152 class WaitTestInstance : public TestInstance
153 {
154 public:
WaitTestInstance(Context & context,SynchronizationType type,bool waitAll,bool signalFromDevice)155     WaitTestInstance(Context &context, SynchronizationType type, bool waitAll, bool signalFromDevice)
156         : TestInstance(context)
157         , m_type(type)
158         , m_waitAll(waitAll)
159         , m_signalFromDevice(signalFromDevice)
160     {
161     }
162 
iterate(void)163     tcu::TestStatus iterate(void)
164     {
165         const DeviceInterface &vk = m_context.getDeviceInterface();
166         const VkDevice &device    = m_context.getDevice();
167         const VkQueue queue       = m_context.getUniversalQueue();
168         Unique<VkFence> fence(createFence(vk, device));
169         std::vector<SharedPtr<Move<VkSemaphore>>> semaphorePtrs(createTimelineSemaphores(vk, device, 100));
170         de::Random rng(1234);
171         std::vector<VkSemaphore> semaphores;
172         std::vector<uint64_t> timelineValues;
173 
174         for (uint32_t i = 0; i < semaphorePtrs.size(); i++)
175         {
176             semaphores.push_back((*semaphorePtrs[i]).get());
177             timelineValues.push_back(rng.getInt(1, 10000));
178         }
179 
180         if (m_waitAll)
181         {
182 
183             for (uint32_t semIdx = 0; semIdx < semaphores.size(); semIdx++)
184             {
185                 if (m_signalFromDevice)
186                 {
187                     deviceSignal(vk, device, queue, *fence, m_type, semaphores[semIdx], timelineValues[semIdx]);
188                     VK_CHECK(vk.resetFences(device, 1, &fence.get()));
189                 }
190                 else
191                     hostSignal(vk, device, semaphores[semIdx], timelineValues[semIdx]);
192             }
193         }
194         else
195         {
196             uint32_t randomIdx = rng.getInt(0, (uint32_t)(semaphores.size() - 1));
197 
198             if (m_signalFromDevice)
199                 deviceSignal(vk, device, queue, *fence, m_type, semaphores[randomIdx], timelineValues[randomIdx]);
200             else
201                 hostSignal(vk, device, semaphores[randomIdx], timelineValues[randomIdx]);
202         }
203 
204         {
205             const VkSemaphoreWaitInfo waitInfo = {
206                 VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,                            // VkStructureType sType;
207                 DE_NULL,                                                          // const void* pNext;
208                 m_waitAll ? 0u : (VkSemaphoreWaitFlags)VK_SEMAPHORE_WAIT_ANY_BIT, // VkSemaphoreWaitFlagsKHR flags;
209                 (uint32_t)semaphores.size(),                                      // uint32_t semaphoreCount;
210                 &semaphores[0],                                                   // const VkSemaphore* pSemaphores;
211                 &timelineValues[0],                                               // const uint64_t* pValues;
212             };
213 
214             VkResult result = vk.waitSemaphores(device, &waitInfo, 0ull);
215             if (result != VK_SUCCESS)
216                 return tcu::TestStatus::fail("Wait failed");
217         }
218 
219         VK_CHECK(vk.deviceWaitIdle(device));
220 
221         return tcu::TestStatus::pass("Wait success");
222     }
223 
224 private:
createTimelineSemaphores(const DeviceInterface & vk,const VkDevice & device,uint32_t count)225     std::vector<SharedPtr<Move<VkSemaphore>>> createTimelineSemaphores(const DeviceInterface &vk,
226                                                                        const VkDevice &device, uint32_t count)
227     {
228         std::vector<SharedPtr<Move<VkSemaphore>>> semaphores;
229 
230         for (uint32_t i = 0; i < count; i++)
231             semaphores.push_back(makeVkSharedPtr(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE)));
232 
233         return semaphores;
234     }
235 
236     const SynchronizationType m_type;
237     bool m_waitAll;
238     bool m_signalFromDevice;
239 };
240 
241 class WaitTestCase : public TestCase
242 {
243 public:
WaitTestCase(tcu::TestContext & testCtx,const std::string & name,SynchronizationType type,bool waitAll,bool signalFromDevice)244     WaitTestCase(tcu::TestContext &testCtx, const std::string &name, SynchronizationType type, bool waitAll,
245                  bool signalFromDevice)
246         : TestCase(testCtx, name.c_str())
247         , m_type(type)
248         , m_waitAll(waitAll)
249         , m_signalFromDevice(signalFromDevice)
250     {
251     }
252 
checkSupport(Context & context) const253     void checkSupport(Context &context) const override
254     {
255         context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
256         if (m_type == SynchronizationType::SYNCHRONIZATION2)
257             context.requireDeviceFunctionality("VK_KHR_synchronization2");
258     }
259 
createInstance(Context & context) const260     TestInstance *createInstance(Context &context) const override
261     {
262         return new WaitTestInstance(context, m_type, m_waitAll, m_signalFromDevice);
263     }
264 
265 private:
266     const SynchronizationType m_type;
267     bool m_waitAll;
268     bool m_signalFromDevice;
269 };
270 
271 // This test verifies that waiting from the host on a timeline point
272 // that is itself waiting for signaling works properly.
273 class HostWaitBeforeSignalTestInstance : public TestInstance
274 {
275 public:
HostWaitBeforeSignalTestInstance(Context & context,SynchronizationType type)276     HostWaitBeforeSignalTestInstance(Context &context, SynchronizationType type) : TestInstance(context), m_type(type)
277     {
278     }
279 
iterate(void)280     tcu::TestStatus iterate(void)
281     {
282         const DeviceInterface &vk = m_context.getDeviceInterface();
283         const VkDevice &device    = m_context.getDevice();
284         const VkQueue queue       = m_context.getUniversalQueue();
285         Unique<VkSemaphore> semaphore(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE));
286         de::Random rng(1234);
287         std::vector<uint64_t> timelineValues;
288 
289         // Host value we signal at the end.
290         timelineValues.push_back(1 + rng.getInt(1, 10000));
291 
292         for (uint32_t i = 0; i < 12; i++)
293         {
294             const uint64_t newTimelineValue                  = (timelineValues.back() + rng.getInt(1, 10000));
295             VkSemaphoreSubmitInfoKHR waitSemaphoreSubmitInfo = makeCommonSemaphoreSubmitInfo(
296                 *semaphore, timelineValues.back(), VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR);
297             VkSemaphoreSubmitInfoKHR signalSemaphoreSubmitInfo =
298                 makeCommonSemaphoreSubmitInfo(*semaphore, newTimelineValue, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR);
299             SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(m_type, vk, true);
300 
301             synchronizationWrapper->addSubmitInfo(
302                 1u,                         // uint32_t                                waitSemaphoreInfoCount
303                 &waitSemaphoreSubmitInfo,   // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
304                 0u,                         // uint32_t                                commandBufferInfoCount
305                 DE_NULL,                    // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
306                 1u,                         // uint32_t                                signalSemaphoreInfoCount
307                 &signalSemaphoreSubmitInfo, // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
308                 true, true);
309 
310             VK_CHECK(synchronizationWrapper->queueSubmit(queue, DE_NULL));
311 
312             timelineValues.push_back(newTimelineValue);
313         }
314 
315         {
316             const VkSemaphoreWaitInfo waitInfo = {
317                 VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, // VkStructureType sType;
318                 DE_NULL,                               // const void* pNext;
319                 0u,                                    // VkSemaphoreWaitFlagsKHR flags;
320                 (uint32_t)1u,                          // uint32_t semaphoreCount;
321                 &semaphore.get(),                      // const VkSemaphore* pSemaphores;
322                 &timelineValues[rng.getInt(0, static_cast<int>(timelineValues.size() - 1))], // const uint64_t* pValues;
323             };
324 
325             VkResult result = vk.waitSemaphores(device, &waitInfo, 0ull);
326             if (result != VK_TIMEOUT)
327                 return tcu::TestStatus::fail("Wait failed");
328         }
329 
330         hostSignal(vk, device, *semaphore, timelineValues.front());
331 
332         {
333             const VkSemaphoreWaitInfo waitInfo = {
334                 VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, // VkStructureType sType;
335                 DE_NULL,                               // const void* pNext;
336                 0u,                                    // VkSemaphoreWaitFlagsKHR flags;
337                 (uint32_t)1u,                          // uint32_t semaphoreCount;
338                 &semaphore.get(),                      // const VkSemaphore* pSemaphores;
339                 &timelineValues.back(),                // const uint64_t* pValues;
340             };
341 
342             VkResult result = vk.waitSemaphores(device, &waitInfo, ~(0ull));
343             if (result != VK_SUCCESS)
344                 return tcu::TestStatus::fail("Wait failed");
345         }
346 
347         VK_CHECK(vk.deviceWaitIdle(device));
348 
349         return tcu::TestStatus::pass("Wait success");
350     }
351 
352 private:
createTimelineSemaphores(const DeviceInterface & vk,const VkDevice & device,uint32_t count)353     std::vector<SharedPtr<Move<VkSemaphore>>> createTimelineSemaphores(const DeviceInterface &vk,
354                                                                        const VkDevice &device, uint32_t count)
355     {
356         std::vector<SharedPtr<Move<VkSemaphore>>> semaphores;
357 
358         for (uint32_t i = 0; i < count; i++)
359             semaphores.push_back(makeVkSharedPtr(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE)));
360 
361         return semaphores;
362     }
363 
364 protected:
365     const SynchronizationType m_type;
366 };
367 
368 class HostWaitBeforeSignalTestCase : public TestCase
369 {
370 public:
HostWaitBeforeSignalTestCase(tcu::TestContext & testCtx,const std::string & name,SynchronizationType type)371     HostWaitBeforeSignalTestCase(tcu::TestContext &testCtx, const std::string &name, SynchronizationType type)
372         : TestCase(testCtx, name.c_str())
373         , m_type(type)
374     {
375     }
376 
checkSupport(Context & context) const377     void checkSupport(Context &context) const override
378     {
379         context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
380         if (m_type == SynchronizationType::SYNCHRONIZATION2)
381             context.requireDeviceFunctionality("VK_KHR_synchronization2");
382     }
383 
createInstance(Context & context) const384     TestInstance *createInstance(Context &context) const override
385     {
386         return new HostWaitBeforeSignalTestInstance(context, m_type);
387     }
388 
389 protected:
390     const SynchronizationType m_type;
391 };
392 
393 class PollTestInstance : public TestInstance
394 {
395 public:
PollTestInstance(Context & context,bool signalFromDevice)396     PollTestInstance(Context &context, bool signalFromDevice)
397         : TestInstance(context)
398         , m_signalFromDevice(signalFromDevice)
399     {
400     }
401 
iterate(void)402     tcu::TestStatus iterate(void)
403     {
404         const DeviceInterface &vk = m_context.getDeviceInterface();
405         const VkDevice &device    = m_context.getDevice();
406         const VkQueue queue       = m_context.getUniversalQueue();
407         Unique<VkFence> fence(createFence(vk, device));
408         std::vector<SharedPtr<Move<VkSemaphore>>> semaphorePtrs(createTimelineSemaphores(vk, device, 100));
409         de::Random rng(1234);
410         std::vector<VkSemaphore> semaphores;
411         std::vector<uint64_t> timelineValues;
412         const uint64_t secondInMicroSeconds = 1000ull * 1000ull * 1000ull;
413         uint64_t startTime;
414         VkResult result = VK_SUCCESS;
415 
416         for (uint32_t i = 0; i < semaphorePtrs.size(); i++)
417         {
418             semaphores.push_back((*semaphorePtrs[i]).get());
419             timelineValues.push_back(rng.getInt(1, 10000));
420         }
421 
422         for (uint32_t semIdx = 0; semIdx < semaphores.size(); semIdx++)
423         {
424             if (m_signalFromDevice)
425             {
426                 deviceSignal(vk, device, queue, semIdx == (semaphores.size() - 1) ? *fence : DE_NULL,
427                              SynchronizationType::LEGACY, semaphores[semIdx], timelineValues[semIdx]);
428             }
429             else
430                 hostSignal(vk, device, semaphores[semIdx], timelineValues[semIdx]);
431         }
432 
433         startTime = deGetMicroseconds();
434 
435         do
436         {
437             uint64_t value;
438 
439             result = vk.getSemaphoreCounterValue(device, semaphores.back(), &value);
440 
441             if (result != VK_SUCCESS)
442                 break;
443 
444             if (value == timelineValues.back())
445             {
446                 if (m_signalFromDevice)
447                     VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~(0ull)));
448                 VK_CHECK(vk.deviceWaitIdle(device));
449                 return tcu::TestStatus::pass("Poll on timeline value succeeded");
450             }
451 
452             if (value > timelineValues.back())
453             {
454                 result = VK_ERROR_UNKNOWN;
455                 break;
456             }
457         } while ((deGetMicroseconds() - startTime) < secondInMicroSeconds);
458 
459         VK_CHECK(vk.deviceWaitIdle(device));
460 
461         if (result != VK_SUCCESS)
462             return tcu::TestStatus::fail("Fail");
463         return tcu::TestStatus::fail("Timeout");
464     }
465 
466 private:
createTimelineSemaphores(const DeviceInterface & vk,const VkDevice & device,uint32_t count)467     std::vector<SharedPtr<Move<VkSemaphore>>> createTimelineSemaphores(const DeviceInterface &vk,
468                                                                        const VkDevice &device, uint32_t count)
469     {
470         std::vector<SharedPtr<Move<VkSemaphore>>> semaphores;
471 
472         for (uint32_t i = 0; i < count; i++)
473             semaphores.push_back(makeVkSharedPtr(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE)));
474 
475         return semaphores;
476     }
477 
478     bool m_signalFromDevice;
479 };
480 
481 class PollTestCase : public TestCase
482 {
483 public:
PollTestCase(tcu::TestContext & testCtx,const std::string & name,bool signalFromDevice)484     PollTestCase(tcu::TestContext &testCtx, const std::string &name, bool signalFromDevice)
485         : TestCase(testCtx, name.c_str())
486         , m_signalFromDevice(signalFromDevice)
487     {
488     }
489 
checkSupport(Context & context) const490     virtual void checkSupport(Context &context) const
491     {
492         context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
493     }
494 
createInstance(Context & context) const495     TestInstance *createInstance(Context &context) const
496     {
497         return new PollTestInstance(context, m_signalFromDevice);
498     }
499 
500 private:
501     bool m_signalFromDevice;
502 };
503 
504 class MonotonicallyIncrementChecker : public de::Thread
505 {
506 public:
MonotonicallyIncrementChecker(const DeviceInterface & vkd,VkDevice device,VkSemaphore semaphore)507     MonotonicallyIncrementChecker(const DeviceInterface &vkd, VkDevice device, VkSemaphore semaphore)
508         : de::Thread()
509         , m_vkd(vkd)
510         , m_device(device)
511         , m_semaphore(semaphore)
512         , m_running(true)
513         , m_status(tcu::TestStatus::incomplete())
514     {
515     }
516 
~MonotonicallyIncrementChecker(void)517     virtual ~MonotonicallyIncrementChecker(void)
518     {
519     }
520 
getStatus()521     tcu::TestStatus getStatus()
522     {
523         return m_status;
524     }
stop()525     void stop()
526     {
527         m_running = false;
528     }
run()529     virtual void run()
530     {
531         uint64_t lastValue = 0;
532 
533         while (m_running)
534         {
535             uint64_t value;
536 
537             VK_CHECK(m_vkd.getSemaphoreCounterValue(m_device, m_semaphore, &value));
538 
539             if (value < lastValue)
540             {
541                 m_status = tcu::TestStatus::fail("Value not monotonically increasing");
542                 return;
543             }
544 
545             lastValue = value;
546             deYield();
547         }
548 
549         m_status = tcu::TestStatus::pass("Value monotonically increasing");
550     }
551 
552 private:
553     const DeviceInterface &m_vkd;
554     VkDevice m_device;
555     VkSemaphore m_semaphore;
556     bool m_running;
557     tcu::TestStatus m_status;
558 };
559 
checkSupport(Context & context,SynchronizationType type)560 void checkSupport(Context &context, SynchronizationType type)
561 {
562     context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
563     if (type == SynchronizationType::SYNCHRONIZATION2)
564         context.requireDeviceFunctionality("VK_KHR_synchronization2");
565 }
566 
567 // Queue device signaling close to the edges of the
568 // maxTimelineSemaphoreValueDifference value and verify that the value
569 // of the semaphore never goes backwards.
maxDifferenceValueCase(Context & context,SynchronizationType type)570 tcu::TestStatus maxDifferenceValueCase(Context &context, SynchronizationType type)
571 {
572     const DeviceInterface &vk                 = context.getDeviceInterface();
573     const VkDevice &device                    = context.getDevice();
574     const VkQueue queue                       = context.getUniversalQueue();
575     const uint64_t requiredMinValueDifference = deIntMaxValue32(32);
576     const uint64_t maxTimelineValueDifference =
577         getMaxTimelineSemaphoreValueDifference(context.getInstanceInterface(), context.getPhysicalDevice());
578     const Unique<VkSemaphore> semaphore(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE));
579     const Unique<VkFence> fence(createFence(vk, device));
580     tcu::TestLog &log = context.getTestContext().getLog();
581     MonotonicallyIncrementChecker checkerThread(vk, device, *semaphore);
582     uint64_t iterations;
583     uint64_t timelineBackValue;
584     uint64_t timelineFrontValue;
585 
586     if (maxTimelineValueDifference < requiredMinValueDifference)
587         return tcu::TestStatus::fail("Timeline semaphore max value difference test failed");
588 
589     iterations = std::min<uint64_t>(std::numeric_limits<uint64_t>::max() / maxTimelineValueDifference, 100ull);
590 
591     log << TestLog::Message << " maxTimelineSemaphoreValueDifference=" << maxTimelineValueDifference
592         << " maxExpected=" << requiredMinValueDifference << " iterations=" << iterations << TestLog::EndMessage;
593 
594     checkerThread.start();
595 
596     timelineBackValue = timelineFrontValue = 1;
597     hostSignal(vk, device, *semaphore, timelineFrontValue);
598 
599     for (uint64_t i = 0; i < iterations; i++)
600     {
601         uint64_t fenceValue;
602 
603         for (uint32_t j = 1; j <= 10; j++)
604             deviceSignal(vk, device, queue, DE_NULL, type, *semaphore, ++timelineFrontValue);
605 
606         timelineFrontValue = timelineBackValue + maxTimelineValueDifference - 10;
607         fenceValue         = timelineFrontValue;
608         deviceSignal(vk, device, queue, *fence, type, *semaphore, fenceValue);
609         for (uint32_t j = 1; j < 10; j++)
610             deviceSignal(vk, device, queue, DE_NULL, type, *semaphore, ++timelineFrontValue);
611 
612         uint64_t value;
613         VK_CHECK(vk.getSemaphoreCounterValue(device, *semaphore, &value));
614 
615         VK_CHECK(vk.waitForFences(device, 1, &fence.get(), VK_TRUE, ~(0ull)));
616         VK_CHECK(vk.resetFences(device, 1, &fence.get()));
617 
618         timelineBackValue = fenceValue;
619     }
620 
621     VK_CHECK(vk.deviceWaitIdle(device));
622 
623     checkerThread.stop();
624     checkerThread.join();
625 
626     return checkerThread.getStatus();
627 }
628 
initialValueCase(Context & context,SynchronizationType type)629 tcu::TestStatus initialValueCase(Context &context, SynchronizationType type)
630 {
631     DE_UNREF(type);
632 
633     const DeviceInterface &vk = context.getDeviceInterface();
634     const VkDevice &device    = context.getDevice();
635     const VkQueue queue       = context.getUniversalQueue();
636     const uint64_t maxTimelineValueDifference =
637         getMaxTimelineSemaphoreValueDifference(context.getInstanceInterface(), context.getPhysicalDevice());
638     de::Random rng(1234);
639     const uint64_t nonZeroValue = 1 + rng.getUint64() % (maxTimelineValueDifference - 1);
640     const Unique<VkSemaphore> semaphoreDefaultValue(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE));
641     const Unique<VkSemaphore> semaphoreInitialValue(
642         createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE, 0, nonZeroValue));
643     uint64_t initialValue;
644     VkSemaphoreWaitInfo waitInfo = {
645         VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, // VkStructureType sType;
646         DE_NULL,                               // const void* pNext;
647         0u,                                    // VkSemaphoreWaitFlagsKHR flags;
648         1u,                                    // uint32_t semaphoreCount;
649         DE_NULL,                               // const VkSemaphore* pSemaphores;
650         &initialValue,                         // const uint64_t* pValues;
651     };
652     uint64_t value;
653     VkResult result;
654 
655     waitInfo.pSemaphores = &semaphoreDefaultValue.get();
656     initialValue         = 0;
657     result               = vk.waitSemaphores(device, &waitInfo, 0ull);
658     if (result != VK_SUCCESS)
659         return tcu::TestStatus::fail("Wait zero initial value failed");
660 
661     {
662         VkSemaphoreSubmitInfoKHR waitSemaphoreSubmitInfo = makeCommonSemaphoreSubmitInfo(
663             *semaphoreDefaultValue, initialValue, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR);
664         SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(type, vk, true);
665 
666         synchronizationWrapper->addSubmitInfo(
667             1u,                       // uint32_t                                waitSemaphoreInfoCount
668             &waitSemaphoreSubmitInfo, // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
669             0u,                       // uint32_t                                commandBufferInfoCount
670             DE_NULL,                  // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
671             0u,                       // uint32_t                                signalSemaphoreInfoCount
672             DE_NULL,                  // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
673             true, false);
674 
675         VK_CHECK(synchronizationWrapper->queueSubmit(queue, DE_NULL));
676 
677         VK_CHECK(vk.deviceWaitIdle(device));
678     }
679 
680     VK_CHECK(vk.getSemaphoreCounterValue(device, *semaphoreDefaultValue, &value));
681 #ifdef CTS_USES_VULKANSC
682     if (context.getTestContext().getCommandLine().isSubProcess())
683 #endif // CTS_USES_VULKANSC
684     {
685         if (value != initialValue)
686             return tcu::TestStatus::fail("Invalid zero initial value");
687     }
688 
689     waitInfo.pSemaphores = &semaphoreInitialValue.get();
690     initialValue         = nonZeroValue;
691     result               = vk.waitSemaphores(device, &waitInfo, 0ull);
692     if (result != VK_SUCCESS)
693         return tcu::TestStatus::fail("Wait non zero initial value failed");
694 
695     VK_CHECK(vk.getSemaphoreCounterValue(device, *semaphoreInitialValue, &value));
696 #ifdef CTS_USES_VULKANSC
697     if (context.getTestContext().getCommandLine().isSubProcess())
698 #endif // CTS_USES_VULKANSC
699     {
700         if (value != nonZeroValue)
701             return tcu::TestStatus::fail("Invalid non zero initial value");
702     }
703 
704     if (maxTimelineValueDifference != std::numeric_limits<uint64_t>::max())
705     {
706         const uint64_t nonZeroMaxValue = maxTimelineValueDifference + 1;
707         const Unique<VkSemaphore> semaphoreMaxValue(
708             createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE, 0, nonZeroMaxValue));
709 
710         waitInfo.pSemaphores = &semaphoreMaxValue.get();
711         initialValue         = nonZeroMaxValue;
712         result               = vk.waitSemaphores(device, &waitInfo, 0ull);
713         if (result != VK_SUCCESS)
714             return tcu::TestStatus::fail("Wait max value failed");
715 
716         VK_CHECK(vk.getSemaphoreCounterValue(device, *semaphoreMaxValue, &value));
717 #ifdef CTS_USES_VULKANSC
718         if (context.getTestContext().getCommandLine().isSubProcess())
719 #endif // CTS_USES_VULKANSC
720         {
721             if (value != nonZeroMaxValue)
722                 return tcu::TestStatus::fail("Invalid max value initial value");
723         }
724     }
725 
726     return tcu::TestStatus::pass("Initial value correct");
727 }
728 
729 class WaitTests : public tcu::TestCaseGroup
730 {
731 public:
732     // Various wait cases of timeline semaphores
WaitTests(tcu::TestContext & testCtx,SynchronizationType type)733     WaitTests(tcu::TestContext &testCtx, SynchronizationType type) : tcu::TestCaseGroup(testCtx, "wait"), m_type(type)
734     {
735     }
736 
init(void)737     void init(void)
738     {
739         static const struct
740         {
741             std::string name;
742             bool waitAll;
743             bool signalFromDevice;
744         } waitCases[] = {
745             {"all_signal_from_device", true, true},
746             {"one_signal_from_device", false, true},
747             {"all_signal_from_host", true, false},
748             {"one_signal_from_host", false, false},
749         };
750 
751         for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(waitCases); caseIdx++)
752             addChild(new WaitTestCase(m_testCtx, waitCases[caseIdx].name, m_type, waitCases[caseIdx].waitAll,
753                                       waitCases[caseIdx].signalFromDevice));
754         addChild(new HostWaitBeforeSignalTestCase(m_testCtx, "host_wait_before_signal", m_type));
755         addChild(new PollTestCase(m_testCtx, "poll_signal_from_device", true));
756         addChild(new PollTestCase(m_testCtx, "poll_signal_from_host", false));
757     }
758 
759 protected:
760     SynchronizationType m_type;
761 };
762 
763 struct TimelineIteration
764 {
TimelineIterationvkt::synchronization::__anonb0b5108b0111::TimelineIteration765     TimelineIteration(OperationContext &opContext, const ResourceDescription &resourceDesc,
766                       const SharedPtr<OperationSupport> &writeOpSupport,
767                       const SharedPtr<OperationSupport> &readOpSupport, uint64_t lastValue, de::Random &rng)
768         : resource(makeSharedPtr(
769               new Resource(opContext, resourceDesc,
770                            writeOpSupport->getOutResourceUsageFlags() | readOpSupport->getInResourceUsageFlags())))
771         , writeOp(makeSharedPtr(writeOpSupport->build(opContext, *resource)))
772         , readOp(makeSharedPtr(readOpSupport->build(opContext, *resource)))
773     {
774         writeValue = lastValue + rng.getInt(1, 100);
775         readValue  = writeValue + rng.getInt(1, 100);
776         cpuValue   = readValue + rng.getInt(1, 100);
777     }
~TimelineIterationvkt::synchronization::__anonb0b5108b0111::TimelineIteration778     ~TimelineIteration()
779     {
780     }
781 
782     SharedPtr<Resource> resource;
783 
784     SharedPtr<Operation> writeOp;
785     SharedPtr<Operation> readOp;
786 
787     uint64_t writeValue;
788     uint64_t readValue;
789     uint64_t cpuValue;
790 };
791 
792 class HostCopyThread : public de::Thread
793 {
794 public:
HostCopyThread(const DeviceInterface & vkd,VkDevice device,VkSemaphore semaphore,const std::vector<SharedPtr<TimelineIteration>> & iterations)795     HostCopyThread(const DeviceInterface &vkd, VkDevice device, VkSemaphore semaphore,
796                    const std::vector<SharedPtr<TimelineIteration>> &iterations)
797         : de::Thread()
798         , m_vkd(vkd)
799         , m_device(device)
800         , m_semaphore(semaphore)
801         , m_iterations(iterations)
802     {
803     }
~HostCopyThread(void)804     virtual ~HostCopyThread(void)
805     {
806     }
807 
run()808     virtual void run()
809     {
810         for (uint32_t iterIdx = 0; iterIdx < m_iterations.size(); iterIdx++)
811         {
812             // Wait on the GPU read operation.
813             {
814                 const VkSemaphoreWaitInfo waitInfo = {
815                     VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, // VkStructureType sType;
816                     DE_NULL,                               // const void* pNext;
817                     0u,                                    // VkSemaphoreWaitFlagsKHR flags;
818                     1u,                                    // uint32_t                    semaphoreCount
819                     &m_semaphore,                          // VkSemaphore* pSemaphores;
820                     &m_iterations[iterIdx]->readValue,     // uint64_t* pValues;
821                 };
822                 VkResult result;
823 
824                 result = m_vkd.waitSemaphores(m_device, &waitInfo, ~(uint64_t)0u);
825                 if (result != VK_SUCCESS)
826                     return;
827             }
828 
829             // Copy the data read on the GPU into the next GPU write operation.
830             if (iterIdx < (m_iterations.size() - 1))
831                 m_iterations[iterIdx + 1]->writeOp->setData(m_iterations[iterIdx]->readOp->getData());
832 
833             // Signal the next GPU write operation.
834             {
835                 const VkSemaphoreSignalInfo signalInfo = {
836                     VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, // VkStructureType sType;
837                     DE_NULL,                                 // const void* pNext;
838                     m_semaphore,                             // VkSemaphore semaphore;
839                     m_iterations[iterIdx]->cpuValue,         // uint64_t value;
840                 };
841                 VkResult result;
842 
843                 result = m_vkd.signalSemaphore(m_device, &signalInfo);
844                 if (result != VK_SUCCESS)
845                     return;
846             }
847         }
848     }
849 
850 private:
851     const DeviceInterface &m_vkd;
852     VkDevice m_device;
853     VkSemaphore m_semaphore;
854     const std::vector<SharedPtr<TimelineIteration>> &m_iterations;
855 };
856 
randomizeData(std::vector<uint8_t> & outData,const ResourceDescription & desc)857 void randomizeData(std::vector<uint8_t> &outData, const ResourceDescription &desc)
858 {
859     de::Random rng(1234);
860 
861     if (desc.type == RESOURCE_TYPE_BUFFER)
862     {
863         for (uint32_t i = 0; i < outData.size(); i++)
864             outData[i] = rng.getUint8();
865     }
866     else
867     {
868         const PlanarFormatDescription planeDesc = getPlanarFormatDescription(desc.imageFormat);
869         tcu::PixelBufferAccess access(mapVkFormat(desc.imageFormat), desc.size.x(), desc.size.y(), desc.size.z(),
870                                       static_cast<void *>(&outData[0]));
871 
872         DE_ASSERT(desc.type == RESOURCE_TYPE_IMAGE);
873 
874         for (int z = 0; z < access.getDepth(); z++)
875         {
876             for (int y = 0; y < access.getHeight(); y++)
877             {
878                 for (int x = 0; x < access.getWidth(); x++)
879                 {
880                     if (isFloatFormat(desc.imageFormat))
881                     {
882                         tcu::Vec4 value(rng.getFloat(), rng.getFloat(), rng.getFloat(), 1.0f);
883                         access.setPixel(value, x, y, z);
884                     }
885                     else
886                     {
887                         tcu::IVec4 value(rng.getInt(0, deIntMaxValue32(planeDesc.channels[0].sizeBits)),
888                                          rng.getInt(0, deIntMaxValue32(planeDesc.channels[1].sizeBits)),
889                                          rng.getInt(0, deIntMaxValue32(planeDesc.channels[2].sizeBits)),
890                                          rng.getInt(0, deIntMaxValue32(planeDesc.channels[3].sizeBits)));
891                         access.setPixel(value, x, y, z);
892                     }
893                 }
894             }
895         }
896     }
897 }
898 
899 // Create a chain of operations with data copied over on the device
900 // and the host with each operation depending on the previous one and
901 // verifies that the data at the beginning & end of the chain is the
902 // same.
903 class DeviceHostTestInstance : public TestInstance
904 {
905 public:
DeviceHostTestInstance(Context & context,SynchronizationType type,const ResourceDescription & resourceDesc,const SharedPtr<OperationSupport> & writeOp,const SharedPtr<OperationSupport> & readOp,PipelineCacheData & pipelineCacheData)906     DeviceHostTestInstance(Context &context, SynchronizationType type, const ResourceDescription &resourceDesc,
907                            const SharedPtr<OperationSupport> &writeOp, const SharedPtr<OperationSupport> &readOp,
908                            PipelineCacheData &pipelineCacheData)
909         : TestInstance(context)
910         , m_type(type)
911         , m_opContext(context, type, pipelineCacheData)
912         , m_resourceDesc(resourceDesc)
913     {
914         de::Random rng(1234);
915 
916         // Create a dozen couple of operations and their associated
917         // resource.
918         for (uint32_t i = 0; i < 12; i++)
919         {
920             m_iterations.push_back(makeSharedPtr(new TimelineIteration(
921                 m_opContext, resourceDesc, writeOp, readOp, i == 0 ? 0 : m_iterations.back()->cpuValue, rng)));
922         }
923     }
924 
iterate(void)925     tcu::TestStatus iterate(void)
926     {
927         const DeviceInterface &vk       = m_context.getDeviceInterface();
928         const VkDevice device           = m_context.getDevice();
929         const VkQueue queue             = m_context.getUniversalQueue();
930         const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
931         const Unique<VkSemaphore> semaphore(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE));
932         const Unique<VkCommandPool> cmdPool(
933             createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
934         HostCopyThread hostCopyThread(vk, device, *semaphore, m_iterations);
935         std::vector<SharedPtr<Move<VkCommandBuffer>>> ptrCmdBuffers;
936         std::vector<VkCommandBufferSubmitInfoKHR> commandBufferSubmitInfos(m_iterations.size() * 2,
937                                                                            makeCommonCommandBufferSubmitInfo(0));
938 
939         hostCopyThread.start();
940 
941         for (uint32_t opNdx = 0; opNdx < (m_iterations.size() * 2); opNdx++)
942         {
943             ptrCmdBuffers.push_back(makeVkSharedPtr(makeCommandBuffer(vk, device, *cmdPool)));
944             commandBufferSubmitInfos[opNdx].commandBuffer = **(ptrCmdBuffers.back());
945         }
946 
947         // Randomize the data copied over.
948         {
949             const Data startData = m_iterations.front()->writeOp->getData();
950             Data randomizedData;
951             std::vector<uint8_t> dataArray;
952 
953             dataArray.resize(startData.size);
954             randomizeData(dataArray, m_resourceDesc);
955             randomizedData.size = dataArray.size();
956             randomizedData.data = &dataArray[0];
957             m_iterations.front()->writeOp->setData(randomizedData);
958         }
959 
960         SynchronizationWrapperPtr synchronizationWrapper =
961             getSynchronizationWrapper(m_type, vk, true, (uint32_t)m_iterations.size() * 2u);
962         std::vector<VkSemaphoreSubmitInfoKHR> waitSemaphoreSubmitInfos(
963             m_iterations.size() * 2,
964             makeCommonSemaphoreSubmitInfo(*semaphore, 0u, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR));
965         std::vector<VkSemaphoreSubmitInfoKHR> signalSemaphoreSubmitInfos(
966             m_iterations.size() * 2,
967             makeCommonSemaphoreSubmitInfo(*semaphore, 0u, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR));
968 
969         for (uint32_t iterIdx = 0; iterIdx < m_iterations.size(); iterIdx++)
970         {
971             // Write operation
972             {
973                 uint32_t wIdx = 2 * iterIdx;
974 
975                 waitSemaphoreSubmitInfos[wIdx].value   = wIdx == 0 ? 0u : m_iterations[iterIdx - 1]->cpuValue;
976                 signalSemaphoreSubmitInfos[wIdx].value = m_iterations[iterIdx]->writeValue;
977 
978                 synchronizationWrapper->addSubmitInfo(
979                     wIdx == 0 ? 0u : 1u,             // uint32_t                                waitSemaphoreInfoCount
980                     &waitSemaphoreSubmitInfos[wIdx], // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
981                     1u,                              // uint32_t                                commandBufferInfoCount
982                     &commandBufferSubmitInfos[wIdx], // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
983                     1u,                              // uint32_t                                signalSemaphoreInfoCount
984                     &signalSemaphoreSubmitInfos[wIdx], // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
985                     wIdx == 0 ? false : true, true);
986 
987                 VkCommandBuffer cmdBuffer = commandBufferSubmitInfos[wIdx].commandBuffer;
988                 beginCommandBuffer(vk, cmdBuffer);
989                 m_iterations[iterIdx]->writeOp->recordCommands(cmdBuffer);
990 
991                 {
992                     const SyncInfo writeSync = m_iterations[iterIdx]->writeOp->getOutSyncInfo();
993                     const SyncInfo readSync  = m_iterations[iterIdx]->readOp->getInSyncInfo();
994                     const Resource &resource = *(m_iterations[iterIdx]->resource);
995 
996                     if (resource.getType() == RESOURCE_TYPE_IMAGE)
997                     {
998                         DE_ASSERT(writeSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
999                         DE_ASSERT(readSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1000 
1001                         const VkImageMemoryBarrier2KHR imageMemoryBarrier2 = makeImageMemoryBarrier2(
1002                             writeSync.stageMask,                 // VkPipelineStageFlags2KHR            srcStageMask
1003                             writeSync.accessMask,                // VkAccessFlags2KHR                srcAccessMask
1004                             readSync.stageMask,                  // VkPipelineStageFlags2KHR            dstStageMask
1005                             readSync.accessMask,                 // VkAccessFlags2KHR                dstAccessMask
1006                             writeSync.imageLayout,               // VkImageLayout                    oldLayout
1007                             readSync.imageLayout,                // VkImageLayout                    newLayout
1008                             resource.getImage().handle,          // VkImage                            image
1009                             resource.getImage().subresourceRange // VkImageSubresourceRange            subresourceRange
1010                         );
1011                         VkDependencyInfoKHR dependencyInfo =
1012                             makeCommonDependencyInfo(DE_NULL, DE_NULL, &imageMemoryBarrier2);
1013                         synchronizationWrapper->cmdPipelineBarrier(cmdBuffer, &dependencyInfo);
1014                     }
1015                     else
1016                     {
1017                         const VkBufferMemoryBarrier2KHR bufferMemoryBarrier2 = makeBufferMemoryBarrier2(
1018                             writeSync.stageMask,         // VkPipelineStageFlags2KHR            srcStageMask
1019                             writeSync.accessMask,        // VkAccessFlags2KHR                srcAccessMask
1020                             readSync.stageMask,          // VkPipelineStageFlags2KHR            dstStageMask
1021                             readSync.accessMask,         // VkAccessFlags2KHR                dstAccessMask
1022                             resource.getBuffer().handle, // VkBuffer                            buffer
1023                             0,                           // VkDeviceSize                        offset
1024                             VK_WHOLE_SIZE                // VkDeviceSize                        size
1025                         );
1026                         VkDependencyInfoKHR dependencyInfo = makeCommonDependencyInfo(DE_NULL, &bufferMemoryBarrier2);
1027                         synchronizationWrapper->cmdPipelineBarrier(cmdBuffer, &dependencyInfo);
1028                     }
1029                 }
1030 
1031                 endCommandBuffer(vk, cmdBuffer);
1032             }
1033 
1034             // Read operation
1035             {
1036                 uint32_t rIdx = 2 * iterIdx + 1;
1037 
1038                 waitSemaphoreSubmitInfos[rIdx].value   = m_iterations[iterIdx]->writeValue;
1039                 signalSemaphoreSubmitInfos[rIdx].value = m_iterations[iterIdx]->readValue;
1040 
1041                 synchronizationWrapper->addSubmitInfo(
1042                     1u,                              // uint32_t                                waitSemaphoreInfoCount
1043                     &waitSemaphoreSubmitInfos[rIdx], // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
1044                     1u,                              // uint32_t                                commandBufferInfoCount
1045                     &commandBufferSubmitInfos[rIdx], // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
1046                     1u,                              // uint32_t                                signalSemaphoreInfoCount
1047                     &signalSemaphoreSubmitInfos[rIdx], // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
1048                     rIdx == 0 ? false : true, true);
1049 
1050                 VkCommandBuffer cmdBuffer = commandBufferSubmitInfos[rIdx].commandBuffer;
1051                 beginCommandBuffer(vk, cmdBuffer);
1052                 m_iterations[iterIdx]->readOp->recordCommands(cmdBuffer);
1053                 endCommandBuffer(vk, cmdBuffer);
1054             }
1055         }
1056 
1057         VK_CHECK(synchronizationWrapper->queueSubmit(queue, DE_NULL));
1058 
1059         VK_CHECK(vk.deviceWaitIdle(device));
1060 
1061         hostCopyThread.join();
1062 
1063         {
1064             const Data expected = m_iterations.front()->writeOp->getData();
1065             const Data actual   = m_iterations.back()->readOp->getData();
1066 
1067             if (0 != deMemCmp(expected.data, actual.data, expected.size))
1068                 return tcu::TestStatus::fail("Memory contents don't match");
1069         }
1070 
1071         return tcu::TestStatus::pass("OK");
1072     }
1073 
1074 protected:
1075     const SynchronizationType m_type;
1076     OperationContext m_opContext;
1077     const ResourceDescription m_resourceDesc;
1078     std::vector<SharedPtr<TimelineIteration>> m_iterations;
1079 };
1080 
1081 class DeviceHostSyncTestCase : public TestCase
1082 {
1083 public:
DeviceHostSyncTestCase(tcu::TestContext & testCtx,const std::string & name,SynchronizationType type,const ResourceDescription resourceDesc,const OperationName writeOp,const OperationName readOp,PipelineCacheData & pipelineCacheData)1084     DeviceHostSyncTestCase(tcu::TestContext &testCtx, const std::string &name, SynchronizationType type,
1085                            const ResourceDescription resourceDesc, const OperationName writeOp,
1086                            const OperationName readOp, PipelineCacheData &pipelineCacheData)
1087         : TestCase(testCtx, name)
1088         , m_type(type)
1089         , m_resourceDesc(resourceDesc)
1090         , m_writeOp(makeOperationSupport(writeOp, resourceDesc).release())
1091         , m_readOp(makeOperationSupport(readOp, resourceDesc).release())
1092         , m_pipelineCacheData(pipelineCacheData)
1093     {
1094     }
1095 
checkSupport(Context & context) const1096     void checkSupport(Context &context) const override
1097     {
1098         context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
1099         if (m_type == SynchronizationType::SYNCHRONIZATION2)
1100             context.requireDeviceFunctionality("VK_KHR_synchronization2");
1101     }
1102 
initPrograms(SourceCollections & programCollection) const1103     void initPrograms(SourceCollections &programCollection) const override
1104     {
1105         m_writeOp->initPrograms(programCollection);
1106         m_readOp->initPrograms(programCollection);
1107     }
1108 
createInstance(Context & context) const1109     TestInstance *createInstance(Context &context) const override
1110     {
1111         return new DeviceHostTestInstance(context, m_type, m_resourceDesc, m_writeOp, m_readOp, m_pipelineCacheData);
1112     }
1113 
1114 private:
1115     const SynchronizationType m_type;
1116     const ResourceDescription m_resourceDesc;
1117     const SharedPtr<OperationSupport> m_writeOp;
1118     const SharedPtr<OperationSupport> m_readOp;
1119     PipelineCacheData &m_pipelineCacheData;
1120 };
1121 
1122 class DeviceHostTestsBase : public tcu::TestCaseGroup
1123 {
1124 public:
1125     // Synchronization of serialized device/host operations
DeviceHostTestsBase(tcu::TestContext & testCtx,SynchronizationType type)1126     DeviceHostTestsBase(tcu::TestContext &testCtx, SynchronizationType type)
1127         : tcu::TestCaseGroup(testCtx, "device_host")
1128         , m_type(type)
1129     {
1130     }
1131 
initCommonTests(void)1132     void initCommonTests(void)
1133     {
1134         static const OperationName writeOps[] = {
1135             OPERATION_NAME_WRITE_COPY_BUFFER,
1136             OPERATION_NAME_WRITE_COPY_BUFFER_TO_IMAGE,
1137             OPERATION_NAME_WRITE_COPY_IMAGE_TO_BUFFER,
1138             OPERATION_NAME_WRITE_COPY_IMAGE,
1139             OPERATION_NAME_WRITE_BLIT_IMAGE,
1140             OPERATION_NAME_WRITE_SSBO_VERTEX,
1141             OPERATION_NAME_WRITE_SSBO_TESSELLATION_CONTROL,
1142             OPERATION_NAME_WRITE_SSBO_TESSELLATION_EVALUATION,
1143             OPERATION_NAME_WRITE_SSBO_GEOMETRY,
1144             OPERATION_NAME_WRITE_SSBO_FRAGMENT,
1145             OPERATION_NAME_WRITE_SSBO_COMPUTE,
1146             OPERATION_NAME_WRITE_SSBO_COMPUTE_INDIRECT,
1147             OPERATION_NAME_WRITE_IMAGE_VERTEX,
1148             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_CONTROL,
1149             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_EVALUATION,
1150             OPERATION_NAME_WRITE_IMAGE_GEOMETRY,
1151             OPERATION_NAME_WRITE_IMAGE_FRAGMENT,
1152             OPERATION_NAME_WRITE_IMAGE_COMPUTE,
1153             OPERATION_NAME_WRITE_IMAGE_COMPUTE_INDIRECT,
1154         };
1155         static const OperationName readOps[] = {
1156             OPERATION_NAME_READ_COPY_BUFFER,
1157             OPERATION_NAME_READ_COPY_BUFFER_TO_IMAGE,
1158             OPERATION_NAME_READ_COPY_IMAGE_TO_BUFFER,
1159             OPERATION_NAME_READ_COPY_IMAGE,
1160             OPERATION_NAME_READ_BLIT_IMAGE,
1161             OPERATION_NAME_READ_UBO_VERTEX,
1162             OPERATION_NAME_READ_UBO_TESSELLATION_CONTROL,
1163             OPERATION_NAME_READ_UBO_TESSELLATION_EVALUATION,
1164             OPERATION_NAME_READ_UBO_GEOMETRY,
1165             OPERATION_NAME_READ_UBO_FRAGMENT,
1166             OPERATION_NAME_READ_UBO_COMPUTE,
1167             OPERATION_NAME_READ_UBO_COMPUTE_INDIRECT,
1168             OPERATION_NAME_READ_SSBO_VERTEX,
1169             OPERATION_NAME_READ_SSBO_TESSELLATION_CONTROL,
1170             OPERATION_NAME_READ_SSBO_TESSELLATION_EVALUATION,
1171             OPERATION_NAME_READ_SSBO_GEOMETRY,
1172             OPERATION_NAME_READ_SSBO_FRAGMENT,
1173             OPERATION_NAME_READ_SSBO_COMPUTE,
1174             OPERATION_NAME_READ_SSBO_COMPUTE_INDIRECT,
1175             OPERATION_NAME_READ_IMAGE_VERTEX,
1176             OPERATION_NAME_READ_IMAGE_TESSELLATION_CONTROL,
1177             OPERATION_NAME_READ_IMAGE_TESSELLATION_EVALUATION,
1178             OPERATION_NAME_READ_IMAGE_GEOMETRY,
1179             OPERATION_NAME_READ_IMAGE_FRAGMENT,
1180             OPERATION_NAME_READ_IMAGE_COMPUTE,
1181             OPERATION_NAME_READ_IMAGE_COMPUTE_INDIRECT,
1182             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW,
1183             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW_INDEXED,
1184             OPERATION_NAME_READ_INDIRECT_BUFFER_DISPATCH,
1185             OPERATION_NAME_READ_VERTEX_INPUT,
1186         };
1187 
1188         for (int writeOpNdx = 0; writeOpNdx < DE_LENGTH_OF_ARRAY(writeOps); ++writeOpNdx)
1189             for (int readOpNdx = 0; readOpNdx < DE_LENGTH_OF_ARRAY(readOps); ++readOpNdx)
1190             {
1191                 const OperationName writeOp   = writeOps[writeOpNdx];
1192                 const OperationName readOp    = readOps[readOpNdx];
1193                 const std::string opGroupName = getOperationName(writeOp) + "_" + getOperationName(readOp);
1194                 bool empty                    = true;
1195 
1196                 de::MovePtr<tcu::TestCaseGroup> opGroup(new tcu::TestCaseGroup(m_testCtx, opGroupName.c_str()));
1197 
1198                 for (int resourceNdx = 0; resourceNdx < DE_LENGTH_OF_ARRAY(s_resources); ++resourceNdx)
1199                 {
1200                     const ResourceDescription &resource = s_resources[resourceNdx];
1201                     std::string name                    = getResourceName(resource);
1202 
1203                     if (isResourceSupported(writeOp, resource) && isResourceSupported(readOp, resource))
1204                     {
1205                         opGroup->addChild(new DeviceHostSyncTestCase(m_testCtx, name, m_type, resource, writeOp, readOp,
1206                                                                      m_pipelineCacheData));
1207                         empty = false;
1208                     }
1209                 }
1210                 if (!empty)
1211                     addChild(opGroup.release());
1212             }
1213     }
1214 
1215 protected:
1216     SynchronizationType m_type;
1217 
1218 private:
1219     // synchronization.op tests share pipeline cache data to speed up test
1220     // execution.
1221     PipelineCacheData m_pipelineCacheData;
1222 };
1223 
1224 class LegacyDeviceHostTests : public DeviceHostTestsBase
1225 {
1226 public:
LegacyDeviceHostTests(tcu::TestContext & testCtx)1227     LegacyDeviceHostTests(tcu::TestContext &testCtx) : DeviceHostTestsBase(testCtx, SynchronizationType::LEGACY)
1228     {
1229     }
1230 
init(void)1231     void init(void)
1232     {
1233         initCommonTests();
1234 
1235         de::MovePtr<tcu::TestCaseGroup> miscGroup(new tcu::TestCaseGroup(m_testCtx, "misc"));
1236         // Timeline semaphore properties test
1237         addFunctionCase(miscGroup.get(), "max_difference_value", checkSupport, maxDifferenceValueCase, m_type);
1238         // Timeline semaphore initial value test
1239         addFunctionCase(miscGroup.get(), "initial_value", checkSupport, initialValueCase, m_type);
1240         addChild(miscGroup.release());
1241     }
1242 };
1243 
1244 class Sytnchronization2DeviceHostTests : public DeviceHostTestsBase
1245 {
1246 public:
Sytnchronization2DeviceHostTests(tcu::TestContext & testCtx)1247     Sytnchronization2DeviceHostTests(tcu::TestContext &testCtx)
1248         : DeviceHostTestsBase(testCtx, SynchronizationType::SYNCHRONIZATION2)
1249     {
1250     }
1251 
init(void)1252     void init(void)
1253     {
1254         initCommonTests();
1255 
1256         de::MovePtr<tcu::TestCaseGroup> miscGroup(new tcu::TestCaseGroup(m_testCtx, "misc"));
1257         // Timeline semaphore properties test
1258         addFunctionCase(miscGroup.get(), "max_difference_value", checkSupport, maxDifferenceValueCase, m_type);
1259         addChild(miscGroup.release());
1260     }
1261 };
1262 
1263 struct QueueTimelineIteration
1264 {
QueueTimelineIterationvkt::synchronization::__anonb0b5108b0111::QueueTimelineIteration1265     QueueTimelineIteration(const SharedPtr<OperationSupport> &_opSupport, uint64_t lastValue, VkQueue _queue,
1266                            uint32_t _queueFamilyIdx, de::Random &rng)
1267         : opSupport(_opSupport)
1268         , queue(_queue)
1269         , queueFamilyIdx(_queueFamilyIdx)
1270     {
1271         timelineValue = lastValue + rng.getInt(1, 100);
1272     }
~QueueTimelineIterationvkt::synchronization::__anonb0b5108b0111::QueueTimelineIteration1273     ~QueueTimelineIteration()
1274     {
1275     }
1276 
1277     SharedPtr<OperationSupport> opSupport;
1278     VkQueue queue;
1279     uint32_t queueFamilyIdx;
1280     uint64_t timelineValue;
1281     SharedPtr<Operation> op;
1282 };
1283 
getQueueCreateInfo(const std::vector<VkQueueFamilyProperties> queueFamilyProperties)1284 std::vector<VkDeviceQueueCreateInfo> getQueueCreateInfo(
1285     const std::vector<VkQueueFamilyProperties> queueFamilyProperties)
1286 {
1287     std::vector<VkDeviceQueueCreateInfo> infos;
1288 
1289     for (uint32_t i = 0; i < queueFamilyProperties.size(); i++)
1290     {
1291         VkDeviceQueueCreateInfo info = {VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, DE_NULL, 0, i,
1292                                         queueFamilyProperties[i].queueCount,        DE_NULL};
1293         infos.push_back(info);
1294     }
1295 
1296     return infos;
1297 }
1298 
createTestDevice(Context & context,const VkInstance & instance,const InstanceInterface & vki,SynchronizationType type)1299 Move<VkDevice> createTestDevice(Context &context, const VkInstance &instance, const InstanceInterface &vki,
1300                                 SynchronizationType type)
1301 {
1302     const VkPhysicalDevice physicalDevice = chooseDevice(vki, instance, context.getTestContext().getCommandLine());
1303     const std::vector<VkQueueFamilyProperties> queueFamilyProperties =
1304         getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
1305     std::vector<VkDeviceQueueCreateInfo> queueCreateInfos = getQueueCreateInfo(queueFamilyProperties);
1306     VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2Features{
1307         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR, DE_NULL, true};
1308     VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeatures{
1309         VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, DE_NULL, true};
1310     VkPhysicalDeviceFeatures2 createPhysicalFeatures{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
1311                                                      &timelineSemaphoreFeatures, context.getDeviceFeatures()};
1312     void **nextPtr = &timelineSemaphoreFeatures.pNext;
1313 
1314     std::vector<const char *> deviceExtensions;
1315 
1316     if (!isCoreDeviceExtension(context.getUsedApiVersion(), "VK_KHR_timeline_semaphore"))
1317         deviceExtensions.push_back("VK_KHR_timeline_semaphore");
1318     if (type == SynchronizationType::SYNCHRONIZATION2)
1319     {
1320         deviceExtensions.push_back("VK_KHR_synchronization2");
1321         addToChainVulkanStructure(&nextPtr, synchronization2Features);
1322     }
1323 
1324     void *pNext = &createPhysicalFeatures;
1325 #ifdef CTS_USES_VULKANSC
1326     VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ?
1327                                                                  context.getResourceInterface()->getStatMax() :
1328                                                                  resetDeviceObjectReservationCreateInfo();
1329     memReservationInfo.pNext                               = pNext;
1330     pNext                                                  = &memReservationInfo;
1331 
1332     VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
1333     sc10Features.pNext                              = pNext;
1334     pNext                                           = &sc10Features;
1335 
1336     VkPipelineCacheCreateInfo pcCI;
1337     std::vector<VkPipelinePoolSize> poolSizes;
1338     if (context.getTestContext().getCommandLine().isSubProcess())
1339     {
1340         if (context.getResourceInterface()->getCacheDataSize() > 0)
1341         {
1342             pcCI = {
1343                 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
1344                 DE_NULL,                                      // const void* pNext;
1345                 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
1346                     VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
1347                 context.getResourceInterface()->getCacheDataSize(),       // uintptr_t initialDataSize;
1348                 context.getResourceInterface()->getCacheData()            // const void* pInitialData;
1349             };
1350             memReservationInfo.pipelineCacheCreateInfoCount = 1;
1351             memReservationInfo.pPipelineCacheCreateInfos    = &pcCI;
1352         }
1353 
1354         poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
1355         if (!poolSizes.empty())
1356         {
1357             memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
1358             memReservationInfo.pPipelinePoolSizes    = poolSizes.data();
1359         }
1360     }
1361 #endif // CTS_USES_VULKANSC
1362 
1363     const VkDeviceCreateInfo deviceInfo = {
1364         VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,           //VkStructureType sType;
1365         pNext,                                          //const void* pNext;
1366         0u,                                             //VkDeviceCreateFlags flags;
1367         static_cast<uint32_t>(queueCreateInfos.size()), //uint32_t queueCreateInfoCount;
1368         &queueCreateInfos[0],                           //const VkDeviceQueueCreateInfo* pQueueCreateInfos;
1369         0u,                                             //uint32_t enabledLayerCount;
1370         DE_NULL,                                        //const char* const* ppEnabledLayerNames;
1371         static_cast<uint32_t>(deviceExtensions.size()), //uint32_t enabledExtensionCount;
1372         deviceExtensions.data(),                        //const char* const* ppEnabledExtensionNames;
1373         0u                                              //const VkPhysicalDeviceFeatures* pEnabledFeatures;
1374     };
1375     std::vector<SharedPtr<std::vector<float>>> queuePriorities;
1376 
1377     for (auto &queueCreateInfo : queueCreateInfos)
1378     {
1379         MovePtr<std::vector<float>> priorities(new std::vector<float>);
1380 
1381         for (uint32_t i = 0; i < queueCreateInfo.queueCount; i++)
1382             priorities->push_back(1.0f);
1383 
1384         queuePriorities.push_back(makeSharedPtr(priorities));
1385 
1386         queueCreateInfo.pQueuePriorities = &(*queuePriorities.back().get())[0];
1387     }
1388 
1389     const auto validation = context.getTestContext().getCommandLine().isValidationEnabled();
1390 
1391     return createCustomDevice(validation, context.getPlatformInterface(), instance, vki, physicalDevice, &deviceInfo);
1392 }
1393 
1394 // Class to wrap a singleton instance and device
1395 class SingletonDevice
1396 {
SingletonDevice(Context & context,SynchronizationType type)1397     SingletonDevice(Context &context, SynchronizationType type)
1398         : m_logicalDevice(createTestDevice(context, context.getInstance(), context.getInstanceInterface(), type))
1399     {
1400     }
1401 
1402 public:
getDevice(Context & context,SynchronizationType type)1403     static const Unique<vk::VkDevice> &getDevice(Context &context, SynchronizationType type)
1404     {
1405         if (!m_singletonDevice)
1406             m_singletonDevice = SharedPtr<SingletonDevice>(new SingletonDevice(context, type));
1407 
1408         DE_ASSERT(m_singletonDevice);
1409         return m_singletonDevice->m_logicalDevice;
1410     }
1411 
destroy()1412     static void destroy()
1413     {
1414         m_singletonDevice.clear();
1415     }
1416 
1417 private:
1418     const Unique<vk::VkDevice> m_logicalDevice;
1419 
1420     static SharedPtr<SingletonDevice> m_singletonDevice;
1421 };
1422 SharedPtr<SingletonDevice> SingletonDevice::m_singletonDevice;
1423 
cleanupGroup()1424 static void cleanupGroup()
1425 {
1426     // Destroy singleton object
1427     SingletonDevice::destroy();
1428 }
1429 
1430 // Create a chain of operations with data copied across queues & host
1431 // and submit the operations out of order to verify that the queues
1432 // are properly unblocked as the work progresses.
1433 class WaitBeforeSignalTestInstance : public TestInstance
1434 {
1435 public:
WaitBeforeSignalTestInstance(Context & context,SynchronizationType type,const ResourceDescription & resourceDesc,const SharedPtr<OperationSupport> & writeOp,const SharedPtr<OperationSupport> & readOp,PipelineCacheData & pipelineCacheData)1436     WaitBeforeSignalTestInstance(Context &context, SynchronizationType type, const ResourceDescription &resourceDesc,
1437                                  const SharedPtr<OperationSupport> &writeOp, const SharedPtr<OperationSupport> &readOp,
1438                                  PipelineCacheData &pipelineCacheData)
1439         : TestInstance(context)
1440         , m_type(type)
1441         , m_resourceDesc(resourceDesc)
1442         , m_device(SingletonDevice::getDevice(context, type))
1443         , m_context(context)
1444 #ifndef CTS_USES_VULKANSC
1445         , m_deviceDriver(de::MovePtr<DeviceDriver>(
1446               new DeviceDriver(context.getPlatformInterface(), context.getInstance(), *m_device,
1447                                context.getUsedApiVersion(), context.getTestContext().getCommandLine())))
1448 #else
1449         , m_deviceDriver(de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
1450               new DeviceDriverSC(context.getPlatformInterface(), context.getInstance(), *m_device,
1451                                  context.getTestContext().getCommandLine(), context.getResourceInterface(),
1452                                  m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
1453                                  context.getUsedApiVersion()),
1454               vk::DeinitDeviceDeleter(context.getResourceInterface().get(), *m_device)))
1455 #endif // CTS_USES_VULKANSC
1456         , m_allocator(new SimpleAllocator(
1457               *m_deviceDriver, *m_device,
1458               getPhysicalDeviceMemoryProperties(context.getInstanceInterface(),
1459                                                 chooseDevice(context.getInstanceInterface(), context.getInstance(),
1460                                                              context.getTestContext().getCommandLine()))))
1461         , m_opContext(context, type, *m_deviceDriver, *m_device, *m_allocator, pipelineCacheData)
1462     {
1463         const auto &vki                       = m_context.getInstanceInterface();
1464         const auto instance                   = m_context.getInstance();
1465         const DeviceInterface &vk             = *m_deviceDriver;
1466         const VkDevice device                 = *m_device;
1467         const VkPhysicalDevice physicalDevice = chooseDevice(vki, instance, context.getTestContext().getCommandLine());
1468         const std::vector<VkQueueFamilyProperties> queueFamilyProperties =
1469             getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
1470         const uint32_t universalQueueFamilyIndex = context.getUniversalQueueFamilyIndex();
1471         de::Random rng(1234);
1472         uint32_t lastCopyOpIdx = 0;
1473         std::set<std::pair<uint32_t, uint32_t>> used_queues;
1474 
1475         m_hostTimelineValue = rng.getInt(0, 1000);
1476 
1477         m_iterations.push_back(makeSharedPtr(new QueueTimelineIteration(
1478             writeOp, m_hostTimelineValue, getDeviceQueue(vk, device, universalQueueFamilyIndex, 0),
1479             universalQueueFamilyIndex, rng)));
1480         used_queues.insert(std::make_pair(universalQueueFamilyIndex, 0));
1481 
1482         // Go through all the queues and try to use all the ones that
1483         // support the type of resource we're dealing with.
1484         for (uint32_t familyIdx = 0; familyIdx < queueFamilyProperties.size(); familyIdx++)
1485         {
1486             for (uint32_t instanceIdx = 0; instanceIdx < queueFamilyProperties[familyIdx].queueCount; instanceIdx++)
1487             {
1488                 // Only add each queue once.
1489                 if (used_queues.find(std::make_pair(familyIdx, instanceIdx)) != used_queues.end())
1490                     continue;
1491 
1492                 // Find an operation compatible with the queue
1493                 for (uint32_t copyOpIdx = 0; copyOpIdx < DE_LENGTH_OF_ARRAY(s_copyOps); copyOpIdx++)
1494                 {
1495                     OperationName copyOpName = s_copyOps[(lastCopyOpIdx + copyOpIdx) % DE_LENGTH_OF_ARRAY(s_copyOps)];
1496 
1497                     if (isResourceSupported(copyOpName, resourceDesc))
1498                     {
1499                         SharedPtr<OperationSupport> copyOpSupport(
1500                             makeOperationSupport(copyOpName, resourceDesc).release());
1501                         VkQueueFlags copyOpQueueFlags = copyOpSupport->getQueueFlags(m_opContext);
1502 
1503                         if ((copyOpQueueFlags & queueFamilyProperties[familyIdx].queueFlags) != copyOpQueueFlags)
1504                             continue;
1505 
1506                         // Barriers use VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT pipeline stage so queue must have VK_QUEUE_GRAPHICS_BIT
1507                         if ((copyOpQueueFlags & VK_QUEUE_GRAPHICS_BIT) == 0u)
1508                             continue;
1509 
1510                         m_iterations.push_back(makeSharedPtr(new QueueTimelineIteration(
1511                             copyOpSupport, m_iterations.back()->timelineValue,
1512                             getDeviceQueue(vk, device, familyIdx, instanceIdx), familyIdx, rng)));
1513                         used_queues.insert(std::make_pair(familyIdx, instanceIdx));
1514                         break;
1515                     }
1516                 }
1517             }
1518         }
1519 
1520         // Add the read operation on the universal queue, it should be
1521         // submitted in order with regard to the write operation.
1522         m_iterations.push_back(makeSharedPtr(new QueueTimelineIteration(
1523             readOp, m_iterations.back()->timelineValue, getDeviceQueue(vk, device, universalQueueFamilyIndex, 0),
1524             universalQueueFamilyIndex, rng)));
1525 
1526         // Now create the resources with the usage associated to the
1527         // operation performed on the resource.
1528         for (uint32_t opIdx = 0; opIdx < (m_iterations.size() - 1); opIdx++)
1529         {
1530             uint32_t usage = m_iterations[opIdx]->opSupport->getOutResourceUsageFlags() |
1531                              m_iterations[opIdx + 1]->opSupport->getInResourceUsageFlags();
1532 
1533             m_resources.push_back(makeSharedPtr(new Resource(m_opContext, resourceDesc, usage)));
1534         }
1535 
1536         m_iterations.front()->op =
1537             makeSharedPtr(m_iterations.front()->opSupport->build(m_opContext, *m_resources.front()).release());
1538         for (uint32_t opIdx = 1; opIdx < (m_iterations.size() - 1); opIdx++)
1539         {
1540             m_iterations[opIdx]->op =
1541                 makeSharedPtr(m_iterations[opIdx]
1542                                   ->opSupport->build(m_opContext, *m_resources[opIdx - 1], *m_resources[opIdx])
1543                                   .release());
1544         }
1545         m_iterations.back()->op =
1546             makeSharedPtr(m_iterations.back()->opSupport->build(m_opContext, *m_resources.back()).release());
1547     }
1548 
~WaitBeforeSignalTestInstance()1549     ~WaitBeforeSignalTestInstance()
1550     {
1551     }
1552 
iterate(void)1553     tcu::TestStatus iterate(void)
1554     {
1555         const DeviceInterface &vk = *m_deviceDriver;
1556         const VkDevice device     = *m_device;
1557         const Unique<VkSemaphore> semaphore(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE));
1558         std::vector<SharedPtr<Move<VkCommandPool>>> cmdPools;
1559         std::vector<SharedPtr<Move<VkCommandBuffer>>> ptrCmdBuffers;
1560         std::vector<VkCommandBufferSubmitInfoKHR> commandBufferSubmitInfos(m_iterations.size(),
1561                                                                            makeCommonCommandBufferSubmitInfo(0));
1562         VkSemaphoreSubmitInfoKHR waitSemaphoreSubmitInfo =
1563             makeCommonSemaphoreSubmitInfo(*semaphore, 0u, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR);
1564         VkSemaphoreSubmitInfoKHR signalSemaphoreSubmitInfo =
1565             makeCommonSemaphoreSubmitInfo(*semaphore, 0u, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR);
1566 
1567         for (uint32_t opNdx = 0; opNdx < m_iterations.size(); opNdx++)
1568         {
1569             cmdPools.push_back(makeVkSharedPtr(createCommandPool(
1570                 vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_iterations[opNdx]->queueFamilyIdx)));
1571             ptrCmdBuffers.push_back(makeVkSharedPtr(makeCommandBuffer(vk, device, **cmdPools.back())));
1572             commandBufferSubmitInfos[opNdx].commandBuffer = **(ptrCmdBuffers.back());
1573         }
1574 
1575         // Randomize the data copied over.
1576         {
1577             const Data startData = m_iterations.front()->op->getData();
1578             Data randomizedData;
1579             std::vector<uint8_t> dataArray;
1580 
1581             dataArray.resize(startData.size);
1582             randomizeData(dataArray, m_resourceDesc);
1583             randomizedData.size = dataArray.size();
1584             randomizedData.data = &dataArray[0];
1585             m_iterations.front()->op->setData(randomizedData);
1586         }
1587 
1588         for (uint32_t _iterIdx = 0; _iterIdx < (m_iterations.size() - 1); _iterIdx++)
1589         {
1590             // Submit in reverse order of the dependency order to
1591             // exercise the wait-before-submit behavior.
1592             uint32_t iterIdx                                 = (uint32_t)(m_iterations.size() - 2 - _iterIdx);
1593             VkCommandBuffer cmdBuffer                        = commandBufferSubmitInfos[iterIdx].commandBuffer;
1594             SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(m_type, vk, true);
1595 
1596             waitSemaphoreSubmitInfo.value =
1597                 iterIdx == 0 ? m_hostTimelineValue : m_iterations[iterIdx - 1]->timelineValue;
1598             signalSemaphoreSubmitInfo.value = m_iterations[iterIdx]->timelineValue;
1599 
1600             synchronizationWrapper->addSubmitInfo(
1601                 1u,                                 // uint32_t                                waitSemaphoreInfoCount
1602                 &waitSemaphoreSubmitInfo,           // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
1603                 1u,                                 // uint32_t                                commandBufferInfoCount
1604                 &commandBufferSubmitInfos[iterIdx], // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
1605                 1u,                                 // uint32_t                                signalSemaphoreInfoCount
1606                 &signalSemaphoreSubmitInfo,         // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
1607                 true, true);
1608 
1609             beginCommandBuffer(vk, cmdBuffer);
1610             m_iterations[iterIdx]->op->recordCommands(cmdBuffer);
1611 
1612             {
1613                 const SyncInfo writeSync = m_iterations[iterIdx]->op->getOutSyncInfo();
1614                 const SyncInfo readSync  = m_iterations[iterIdx + 1]->op->getInSyncInfo();
1615                 const Resource &resource = *m_resources[iterIdx];
1616 
1617                 if (resource.getType() == RESOURCE_TYPE_IMAGE)
1618                 {
1619                     DE_ASSERT(writeSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1620                     DE_ASSERT(readSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1621 
1622                     const VkImageMemoryBarrier2KHR imageMemoryBarrier2 = makeImageMemoryBarrier2(
1623                         writeSync.stageMask,                  // VkPipelineStageFlags2KHR            srcStageMask
1624                         writeSync.accessMask,                 // VkAccessFlags2KHR                srcAccessMask
1625                         readSync.stageMask,                   // VkPipelineStageFlags2KHR            dstStageMask
1626                         readSync.accessMask,                  // VkAccessFlags2KHR                dstAccessMask
1627                         writeSync.imageLayout,                // VkImageLayout                    oldLayout
1628                         readSync.imageLayout,                 // VkImageLayout                    newLayout
1629                         resource.getImage().handle,           // VkImage                            image
1630                         resource.getImage().subresourceRange, // VkImageSubresourceRange            subresourceRange
1631                         m_iterations[iterIdx]
1632                             ->queueFamilyIdx, // uint32_t                            srcQueueFamilyIndex
1633                         m_iterations[iterIdx + 1]
1634                             ->queueFamilyIdx // uint32_t                            destQueueFamilyIndex
1635                     );
1636                     VkDependencyInfoKHR dependencyInfo =
1637                         makeCommonDependencyInfo(DE_NULL, DE_NULL, &imageMemoryBarrier2);
1638                     synchronizationWrapper->cmdPipelineBarrier(cmdBuffer, &dependencyInfo);
1639                 }
1640                 else
1641                 {
1642                     const VkBufferMemoryBarrier2KHR bufferMemoryBarrier2 = makeBufferMemoryBarrier2(
1643                         writeSync.stageMask,         // VkPipelineStageFlags2KHR            srcStageMask
1644                         writeSync.accessMask,        // VkAccessFlags2KHR                srcAccessMask
1645                         readSync.stageMask,          // VkPipelineStageFlags2KHR            dstStageMask
1646                         readSync.accessMask,         // VkAccessFlags2KHR                dstAccessMask
1647                         resource.getBuffer().handle, // VkBuffer                            buffer
1648                         0,                           // VkDeviceSize                        offset
1649                         VK_WHOLE_SIZE,               // VkDeviceSize                        size
1650                         m_iterations[iterIdx]
1651                             ->queueFamilyIdx, // uint32_t                            srcQueueFamilyIndex
1652                         m_iterations[iterIdx + 1]
1653                             ->queueFamilyIdx // uint32_t                            dstQueueFamilyIndex
1654                     );
1655                     VkDependencyInfoKHR dependencyInfo = makeCommonDependencyInfo(DE_NULL, &bufferMemoryBarrier2);
1656                     synchronizationWrapper->cmdPipelineBarrier(cmdBuffer, &dependencyInfo);
1657                 }
1658             }
1659 
1660             endCommandBuffer(vk, cmdBuffer);
1661 
1662             VK_CHECK(synchronizationWrapper->queueSubmit(m_iterations[iterIdx]->queue, DE_NULL));
1663         }
1664 
1665         // Submit the last read operation in order.
1666         {
1667             const uint32_t iterIdx                           = (uint32_t)(m_iterations.size() - 1);
1668             SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(m_type, vk, true);
1669 
1670             waitSemaphoreSubmitInfo.value   = m_iterations[iterIdx - 1]->timelineValue;
1671             signalSemaphoreSubmitInfo.value = m_iterations[iterIdx]->timelineValue;
1672 
1673             synchronizationWrapper->addSubmitInfo(
1674                 1u,                                 // uint32_t                                waitSemaphoreInfoCount
1675                 &waitSemaphoreSubmitInfo,           // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
1676                 1u,                                 // uint32_t                                commandBufferInfoCount
1677                 &commandBufferSubmitInfos[iterIdx], // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
1678                 1u,                                 // uint32_t                                signalSemaphoreInfoCount
1679                 &signalSemaphoreSubmitInfo,         // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
1680                 true, true);
1681 
1682             VkCommandBuffer cmdBuffer = commandBufferSubmitInfos[iterIdx].commandBuffer;
1683             beginCommandBuffer(vk, cmdBuffer);
1684             m_iterations[iterIdx]->op->recordCommands(cmdBuffer);
1685             endCommandBuffer(vk, cmdBuffer);
1686 
1687             VK_CHECK(synchronizationWrapper->queueSubmit(m_iterations[iterIdx]->queue, DE_NULL));
1688         }
1689 
1690         {
1691             // Kick off the whole chain from the host.
1692             hostSignal(vk, device, *semaphore, m_hostTimelineValue);
1693             VK_CHECK(vk.deviceWaitIdle(device));
1694         }
1695 
1696         {
1697             const Data expected = m_iterations.front()->op->getData();
1698             const Data actual   = m_iterations.back()->op->getData();
1699 
1700             if (0 != deMemCmp(expected.data, actual.data, expected.size))
1701                 return tcu::TestStatus::fail("Memory contents don't match");
1702         }
1703 
1704         return tcu::TestStatus::pass("OK");
1705     }
1706 
1707 protected:
1708     const SynchronizationType m_type;
1709     const ResourceDescription m_resourceDesc;
1710     const Unique<VkDevice> &m_device;
1711     const Context &m_context;
1712 #ifndef CTS_USES_VULKANSC
1713     de::MovePtr<vk::DeviceDriver> m_deviceDriver;
1714 #else
1715     de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> m_deviceDriver;
1716 #endif // CTS_USES_VULKANSC
1717     MovePtr<Allocator> m_allocator;
1718     OperationContext m_opContext;
1719     std::vector<SharedPtr<QueueTimelineIteration>> m_iterations;
1720     std::vector<SharedPtr<Resource>> m_resources;
1721     uint64_t m_hostTimelineValue;
1722 };
1723 
1724 class WaitBeforeSignalTestCase : public TestCase
1725 {
1726 public:
WaitBeforeSignalTestCase(tcu::TestContext & testCtx,const std::string & name,SynchronizationType type,const ResourceDescription resourceDesc,const OperationName writeOp,const OperationName readOp,PipelineCacheData & pipelineCacheData)1727     WaitBeforeSignalTestCase(tcu::TestContext &testCtx, const std::string &name, SynchronizationType type,
1728                              const ResourceDescription resourceDesc, const OperationName writeOp,
1729                              const OperationName readOp, PipelineCacheData &pipelineCacheData)
1730         : TestCase(testCtx, name)
1731         , m_type(type)
1732         , m_resourceDesc(resourceDesc)
1733         , m_writeOp(makeOperationSupport(writeOp, resourceDesc).release())
1734         , m_readOp(makeOperationSupport(readOp, resourceDesc).release())
1735         , m_pipelineCacheData(pipelineCacheData)
1736     {
1737     }
1738 
checkSupport(Context & context) const1739     void checkSupport(Context &context) const override
1740     {
1741         context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
1742         if (m_type == SynchronizationType::SYNCHRONIZATION2)
1743             context.requireDeviceFunctionality("VK_KHR_synchronization2");
1744     }
1745 
initPrograms(SourceCollections & programCollection) const1746     void initPrograms(SourceCollections &programCollection) const override
1747     {
1748         m_writeOp->initPrograms(programCollection);
1749         m_readOp->initPrograms(programCollection);
1750 
1751         for (uint32_t copyOpNdx = 0; copyOpNdx < DE_LENGTH_OF_ARRAY(s_copyOps); copyOpNdx++)
1752         {
1753             if (isResourceSupported(s_copyOps[copyOpNdx], m_resourceDesc))
1754                 makeOperationSupport(s_copyOps[copyOpNdx], m_resourceDesc)->initPrograms(programCollection);
1755         }
1756     }
1757 
createInstance(Context & context) const1758     TestInstance *createInstance(Context &context) const override
1759     {
1760         return new WaitBeforeSignalTestInstance(context, m_type, m_resourceDesc, m_writeOp, m_readOp,
1761                                                 m_pipelineCacheData);
1762     }
1763 
1764 private:
1765     SynchronizationType m_type;
1766     const ResourceDescription m_resourceDesc;
1767     const SharedPtr<OperationSupport> m_writeOp;
1768     const SharedPtr<OperationSupport> m_readOp;
1769     PipelineCacheData &m_pipelineCacheData;
1770 };
1771 
1772 class WaitBeforeSignalTests : public tcu::TestCaseGroup
1773 {
1774 public:
1775     // Synchronization of out of order submissions to queues
WaitBeforeSignalTests(tcu::TestContext & testCtx,SynchronizationType type)1776     WaitBeforeSignalTests(tcu::TestContext &testCtx, SynchronizationType type)
1777         : tcu::TestCaseGroup(testCtx, "wait_before_signal")
1778         , m_type(type)
1779     {
1780     }
1781 
init(void)1782     void init(void)
1783     {
1784         static const OperationName writeOps[] = {
1785             OPERATION_NAME_WRITE_COPY_BUFFER,
1786             OPERATION_NAME_WRITE_COPY_BUFFER_TO_IMAGE,
1787             OPERATION_NAME_WRITE_COPY_IMAGE_TO_BUFFER,
1788             OPERATION_NAME_WRITE_COPY_IMAGE,
1789             OPERATION_NAME_WRITE_BLIT_IMAGE,
1790             OPERATION_NAME_WRITE_SSBO_VERTEX,
1791             OPERATION_NAME_WRITE_SSBO_TESSELLATION_CONTROL,
1792             OPERATION_NAME_WRITE_SSBO_TESSELLATION_EVALUATION,
1793             OPERATION_NAME_WRITE_SSBO_GEOMETRY,
1794             OPERATION_NAME_WRITE_SSBO_FRAGMENT,
1795             OPERATION_NAME_WRITE_SSBO_COMPUTE,
1796             OPERATION_NAME_WRITE_SSBO_COMPUTE_INDIRECT,
1797             OPERATION_NAME_WRITE_IMAGE_VERTEX,
1798             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_CONTROL,
1799             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_EVALUATION,
1800             OPERATION_NAME_WRITE_IMAGE_GEOMETRY,
1801             OPERATION_NAME_WRITE_IMAGE_FRAGMENT,
1802             OPERATION_NAME_WRITE_IMAGE_COMPUTE,
1803             OPERATION_NAME_WRITE_IMAGE_COMPUTE_INDIRECT,
1804         };
1805         static const OperationName readOps[] = {
1806             OPERATION_NAME_READ_COPY_BUFFER,
1807             OPERATION_NAME_READ_COPY_BUFFER_TO_IMAGE,
1808             OPERATION_NAME_READ_COPY_IMAGE_TO_BUFFER,
1809             OPERATION_NAME_READ_COPY_IMAGE,
1810             OPERATION_NAME_READ_BLIT_IMAGE,
1811             OPERATION_NAME_READ_UBO_VERTEX,
1812             OPERATION_NAME_READ_UBO_TESSELLATION_CONTROL,
1813             OPERATION_NAME_READ_UBO_TESSELLATION_EVALUATION,
1814             OPERATION_NAME_READ_UBO_GEOMETRY,
1815             OPERATION_NAME_READ_UBO_FRAGMENT,
1816             OPERATION_NAME_READ_UBO_COMPUTE,
1817             OPERATION_NAME_READ_UBO_COMPUTE_INDIRECT,
1818             OPERATION_NAME_READ_SSBO_VERTEX,
1819             OPERATION_NAME_READ_SSBO_TESSELLATION_CONTROL,
1820             OPERATION_NAME_READ_SSBO_TESSELLATION_EVALUATION,
1821             OPERATION_NAME_READ_SSBO_GEOMETRY,
1822             OPERATION_NAME_READ_SSBO_FRAGMENT,
1823             OPERATION_NAME_READ_SSBO_COMPUTE,
1824             OPERATION_NAME_READ_SSBO_COMPUTE_INDIRECT,
1825             OPERATION_NAME_READ_IMAGE_VERTEX,
1826             OPERATION_NAME_READ_IMAGE_TESSELLATION_CONTROL,
1827             OPERATION_NAME_READ_IMAGE_TESSELLATION_EVALUATION,
1828             OPERATION_NAME_READ_IMAGE_GEOMETRY,
1829             OPERATION_NAME_READ_IMAGE_FRAGMENT,
1830             OPERATION_NAME_READ_IMAGE_COMPUTE,
1831             OPERATION_NAME_READ_IMAGE_COMPUTE_INDIRECT,
1832             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW,
1833             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW_INDEXED,
1834             OPERATION_NAME_READ_INDIRECT_BUFFER_DISPATCH,
1835             OPERATION_NAME_READ_VERTEX_INPUT,
1836         };
1837 
1838         for (int writeOpNdx = 0; writeOpNdx < DE_LENGTH_OF_ARRAY(writeOps); ++writeOpNdx)
1839             for (int readOpNdx = 0; readOpNdx < DE_LENGTH_OF_ARRAY(readOps); ++readOpNdx)
1840             {
1841                 const OperationName writeOp   = writeOps[writeOpNdx];
1842                 const OperationName readOp    = readOps[readOpNdx];
1843                 const std::string opGroupName = getOperationName(writeOp) + "_" + getOperationName(readOp);
1844                 bool empty                    = true;
1845 
1846                 de::MovePtr<tcu::TestCaseGroup> opGroup(new tcu::TestCaseGroup(m_testCtx, opGroupName.c_str()));
1847 
1848                 for (int resourceNdx = 0; resourceNdx < DE_LENGTH_OF_ARRAY(s_resources); ++resourceNdx)
1849                 {
1850                     const ResourceDescription &resource = s_resources[resourceNdx];
1851                     std::string name                    = getResourceName(resource);
1852 
1853                     if (isResourceSupported(writeOp, resource) && isResourceSupported(readOp, resource))
1854                     {
1855                         opGroup->addChild(new WaitBeforeSignalTestCase(m_testCtx, name, m_type, resource, writeOp,
1856                                                                        readOp, m_pipelineCacheData));
1857                         empty = false;
1858                     }
1859                 }
1860                 if (!empty)
1861                     addChild(opGroup.release());
1862             }
1863     }
1864 
deinit(void)1865     void deinit(void)
1866     {
1867         cleanupGroup();
1868     }
1869 
1870 private:
1871     SynchronizationType m_type;
1872 
1873     // synchronization.op tests share pipeline cache data to speed up test
1874     // execution.
1875     PipelineCacheData m_pipelineCacheData;
1876 };
1877 
1878 // Creates a tree of operations like this :
1879 //
1880 // WriteOp1-Queue0 --> CopyOp2-Queue1 --> ReadOp-Queue4
1881 //                 |
1882 //                 --> CopyOp3-Queue3 --> ReadOp-Queue5
1883 //
1884 // Verifies that we get the data propagated properly.
1885 class OneToNTestInstance : public TestInstance
1886 {
1887 public:
OneToNTestInstance(Context & context,SynchronizationType type,const ResourceDescription & resourceDesc,const SharedPtr<OperationSupport> & writeOp,const SharedPtr<OperationSupport> & readOp,PipelineCacheData & pipelineCacheData)1888     OneToNTestInstance(Context &context, SynchronizationType type, const ResourceDescription &resourceDesc,
1889                        const SharedPtr<OperationSupport> &writeOp, const SharedPtr<OperationSupport> &readOp,
1890                        PipelineCacheData &pipelineCacheData)
1891         : TestInstance(context)
1892         , m_type(type)
1893         , m_resourceDesc(resourceDesc)
1894         , m_device(SingletonDevice::getDevice(context, type))
1895         , m_context(context)
1896 #ifndef CTS_USES_VULKANSC
1897         , m_deviceDriver(de::MovePtr<DeviceDriver>(
1898               new DeviceDriver(context.getPlatformInterface(), context.getInstance(), *m_device,
1899                                context.getUsedApiVersion(), context.getTestContext().getCommandLine())))
1900 #else
1901         , m_deviceDriver(de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
1902               new DeviceDriverSC(context.getPlatformInterface(), context.getInstance(), *m_device,
1903                                  context.getTestContext().getCommandLine(), context.getResourceInterface(),
1904                                  m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
1905                                  context.getUsedApiVersion()),
1906               vk::DeinitDeviceDeleter(context.getResourceInterface().get(), *m_device)))
1907 #endif // CTS_USES_VULKANSC
1908         , m_allocator(new SimpleAllocator(
1909               *m_deviceDriver, *m_device,
1910               getPhysicalDeviceMemoryProperties(context.getInstanceInterface(),
1911                                                 chooseDevice(context.getInstanceInterface(), context.getInstance(),
1912                                                              context.getTestContext().getCommandLine()))))
1913         , m_opContext(context, type, *m_deviceDriver, *m_device, *m_allocator, pipelineCacheData)
1914     {
1915         const auto &vki                       = m_context.getInstanceInterface();
1916         const auto instance                   = m_context.getInstance();
1917         const DeviceInterface &vk             = *m_deviceDriver;
1918         const VkDevice device                 = *m_device;
1919         const VkPhysicalDevice physicalDevice = chooseDevice(vki, instance, context.getTestContext().getCommandLine());
1920         const std::vector<VkQueueFamilyProperties> queueFamilyProperties =
1921             getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);
1922         const uint32_t universalQueueFamilyIndex = context.getUniversalQueueFamilyIndex();
1923         de::Random rng(1234);
1924         uint32_t lastCopyOpIdx = 0;
1925         uint64_t lastSubmitValue;
1926 
1927         m_hostTimelineValue = rng.getInt(0, 1000);
1928 
1929         m_writeIteration = makeSharedPtr(new QueueTimelineIteration(
1930             writeOp, m_hostTimelineValue, getDeviceQueue(vk, device, universalQueueFamilyIndex, 0),
1931             universalQueueFamilyIndex, rng));
1932         lastSubmitValue  = m_writeIteration->timelineValue;
1933 
1934         // Go through all the queues and try to use all the ones that
1935         // support the type of resource we're dealing with.
1936         for (uint32_t familyIdx = 0; familyIdx < queueFamilyProperties.size(); familyIdx++)
1937         {
1938             for (uint32_t instanceIdx = 0; instanceIdx < queueFamilyProperties[familyIdx].queueCount; instanceIdx++)
1939             {
1940                 // Find an operation compatible with the queue
1941                 for (uint32_t copyOpIdx = 0; copyOpIdx < DE_LENGTH_OF_ARRAY(s_copyOps); copyOpIdx++)
1942                 {
1943                     OperationName copyOpName = s_copyOps[(lastCopyOpIdx + copyOpIdx) % DE_LENGTH_OF_ARRAY(s_copyOps)];
1944 
1945                     if (isResourceSupported(copyOpName, resourceDesc))
1946                     {
1947                         SharedPtr<OperationSupport> copyOpSupport(
1948                             makeOperationSupport(copyOpName, resourceDesc).release());
1949                         VkQueueFlags copyOpQueueFlags = copyOpSupport->getQueueFlags(m_opContext);
1950 
1951                         if ((copyOpQueueFlags & queueFamilyProperties[familyIdx].queueFlags) != copyOpQueueFlags)
1952                             continue;
1953 
1954                         VkShaderStageFlagBits writeStage = writeOp->getShaderStage();
1955                         if (writeStage != VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM &&
1956                             !isStageSupported(writeStage, copyOpQueueFlags))
1957                         {
1958                             continue;
1959                         }
1960                         VkShaderStageFlagBits readStage = readOp->getShaderStage();
1961                         if (readStage != VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM &&
1962                             !isStageSupported(readStage, copyOpQueueFlags))
1963                         {
1964                             continue;
1965                         }
1966 
1967                         m_copyIterations.push_back(makeSharedPtr(new QueueTimelineIteration(
1968                             copyOpSupport, lastSubmitValue, getDeviceQueue(vk, device, familyIdx, instanceIdx),
1969                             familyIdx, rng)));
1970                         lastSubmitValue = m_copyIterations.back()->timelineValue;
1971                         break;
1972                     }
1973                 }
1974             }
1975         }
1976 
1977         for (uint32_t copyOpIdx = 0; copyOpIdx < m_copyIterations.size(); copyOpIdx++)
1978         {
1979             bool added = false;
1980 
1981             for (uint32_t familyIdx = 0; familyIdx < queueFamilyProperties.size() && !added; familyIdx++)
1982             {
1983                 for (uint32_t instanceIdx = 0; instanceIdx < queueFamilyProperties[familyIdx].queueCount && !added;
1984                      instanceIdx++)
1985                 {
1986                     VkQueueFlags readOpQueueFlags = readOp->getQueueFlags(m_opContext);
1987                     // Explicitly check if the readOp requires a graphics queue
1988                     if ((readOpQueueFlags & VK_QUEUE_GRAPHICS_BIT) != 0)
1989                     {
1990                         // If none of the queue families support graphics, report unsupported
1991                         bool graphicsSupported = false;
1992                         for (const auto &prop : queueFamilyProperties)
1993                         {
1994                             if ((prop.queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0)
1995                             {
1996                                 graphicsSupported = true;
1997                                 break;
1998                             }
1999                         }
2000                         if (!graphicsSupported)
2001                         {
2002                             TCU_THROW(NotSupportedError, "Graphics queue required but not supported by the driver");
2003                         }
2004                     }
2005                     // If the readOpQueueFlags contain the transfer bit set then check if the queue supports graphics or compute operations before skipping this iteration.
2006                     // Because reporting transfer functionality is optional if a queue supports graphics or compute operations.
2007                     if (((readOpQueueFlags & queueFamilyProperties[familyIdx].queueFlags) != readOpQueueFlags) &&
2008                         (((readOpQueueFlags & VK_QUEUE_TRANSFER_BIT) == 0) ||
2009                          ((queueFamilyProperties[familyIdx].queueFlags &
2010                            (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) == 0)))
2011                         continue;
2012 
2013                     // Add the read operation on the universal queue, it should be
2014                     // submitted in order with regard to the write operation.
2015                     m_readIterations.push_back(makeSharedPtr(new QueueTimelineIteration(
2016                         readOp, lastSubmitValue, getDeviceQueue(vk, device, universalQueueFamilyIndex, 0),
2017                         universalQueueFamilyIndex, rng)));
2018                     lastSubmitValue = m_readIterations.back()->timelineValue;
2019 
2020                     added = true;
2021                 }
2022             }
2023 
2024             DE_ASSERT(added);
2025         }
2026 
2027         DE_ASSERT(m_copyIterations.size() == m_readIterations.size());
2028 
2029         // Now create the resources with the usage associated to the
2030         // operation performed on the resource.
2031         {
2032             uint32_t writeUsage = writeOp->getOutResourceUsageFlags();
2033 
2034             for (uint32_t copyOpIdx = 0; copyOpIdx < m_copyIterations.size(); copyOpIdx++)
2035             {
2036                 writeUsage |= m_copyIterations[copyOpIdx]->opSupport->getInResourceUsageFlags();
2037             }
2038             m_writeResource      = makeSharedPtr(new Resource(m_opContext, resourceDesc, writeUsage));
2039             m_writeIteration->op = makeSharedPtr(writeOp->build(m_opContext, *m_writeResource).release());
2040 
2041             for (uint32_t copyOpIdx = 0; copyOpIdx < m_copyIterations.size(); copyOpIdx++)
2042             {
2043                 uint32_t usage = m_copyIterations[copyOpIdx]->opSupport->getOutResourceUsageFlags() |
2044                                  m_readIterations[copyOpIdx]->opSupport->getInResourceUsageFlags();
2045 
2046                 m_copyResources.push_back(makeSharedPtr(new Resource(m_opContext, resourceDesc, usage)));
2047 
2048                 m_copyIterations[copyOpIdx]->op =
2049                     makeSharedPtr(m_copyIterations[copyOpIdx]
2050                                       ->opSupport->build(m_opContext, *m_writeResource, *m_copyResources[copyOpIdx])
2051                                       .release());
2052                 m_readIterations[copyOpIdx]->op =
2053                     makeSharedPtr(readOp->build(m_opContext, *m_copyResources[copyOpIdx]).release());
2054             }
2055         }
2056     }
2057 
~OneToNTestInstance()2058     ~OneToNTestInstance()
2059     {
2060     }
2061 
recordBarrier(const DeviceInterface & vk,VkCommandBuffer cmdBuffer,const QueueTimelineIteration & inIter,const QueueTimelineIteration & outIter,const Resource & resource,bool originalLayout)2062     void recordBarrier(const DeviceInterface &vk, VkCommandBuffer cmdBuffer, const QueueTimelineIteration &inIter,
2063                        const QueueTimelineIteration &outIter, const Resource &resource, bool originalLayout)
2064     {
2065         const SyncInfo writeSync                         = inIter.op->getOutSyncInfo();
2066         const SyncInfo readSync                          = outIter.op->getInSyncInfo();
2067         SynchronizationWrapperPtr synchronizationWrapper = getSynchronizationWrapper(m_type, vk, true);
2068 
2069         if (resource.getType() == RESOURCE_TYPE_IMAGE)
2070         {
2071             DE_ASSERT(writeSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
2072             DE_ASSERT(readSync.imageLayout != VK_IMAGE_LAYOUT_UNDEFINED);
2073 
2074             const VkImageMemoryBarrier2KHR imageMemoryBarrier2 = makeImageMemoryBarrier2(
2075                 writeSync.stageMask,  // VkPipelineStageFlags2KHR            srcStageMask
2076                 writeSync.accessMask, // VkAccessFlags2KHR                srcAccessMask
2077                 readSync.stageMask,   // VkPipelineStageFlags2KHR            dstStageMask
2078                 readSync.accessMask,  // VkAccessFlags2KHR                dstAccessMask
2079                 originalLayout ? writeSync.imageLayout :
2080                                  readSync.imageLayout, // VkImageLayout                    oldLayout
2081                 readSync.imageLayout,                  // VkImageLayout                    newLayout
2082                 resource.getImage().handle,            // VkImage                            image
2083                 resource.getImage().subresourceRange,  // VkImageSubresourceRange            subresourceRange
2084                 inIter.queueFamilyIdx,                 // uint32_t                            srcQueueFamilyIndex
2085                 outIter.queueFamilyIdx                 // uint32_t                            destQueueFamilyIndex
2086             );
2087             VkDependencyInfoKHR dependencyInfo = makeCommonDependencyInfo(DE_NULL, DE_NULL, &imageMemoryBarrier2);
2088             synchronizationWrapper->cmdPipelineBarrier(cmdBuffer, &dependencyInfo);
2089         }
2090         else
2091         {
2092             const VkBufferMemoryBarrier2KHR bufferMemoryBarrier2 = makeBufferMemoryBarrier2(
2093                 writeSync.stageMask,         // VkPipelineStageFlags2KHR            srcStageMask
2094                 writeSync.accessMask,        // VkAccessFlags2KHR                srcAccessMask
2095                 readSync.stageMask,          // VkPipelineStageFlags2KHR            dstStageMask
2096                 readSync.accessMask,         // VkAccessFlags2KHR                dstAccessMask
2097                 resource.getBuffer().handle, // VkBuffer                            buffer
2098                 0,                           // VkDeviceSize                        offset
2099                 VK_WHOLE_SIZE,               // VkDeviceSize                        size
2100                 inIter.queueFamilyIdx,       // uint32_t                            srcQueueFamilyIndex
2101                 outIter.queueFamilyIdx       // uint32_t                            dstQueueFamilyIndex
2102             );
2103             VkDependencyInfoKHR dependencyInfo = makeCommonDependencyInfo(DE_NULL, &bufferMemoryBarrier2);
2104             synchronizationWrapper->cmdPipelineBarrier(cmdBuffer, &dependencyInfo);
2105         }
2106     }
2107 
submit(const DeviceInterface & vk,VkCommandBuffer cmdBuffer,const QueueTimelineIteration & iter,VkSemaphore semaphore,const uint64_t * waitValues,const uint32_t waitValuesCount)2108     void submit(const DeviceInterface &vk, VkCommandBuffer cmdBuffer, const QueueTimelineIteration &iter,
2109                 VkSemaphore semaphore, const uint64_t *waitValues, const uint32_t waitValuesCount)
2110     {
2111         VkSemaphoreSubmitInfoKHR waitSemaphoreSubmitInfo[] = {
2112             makeCommonSemaphoreSubmitInfo(semaphore, waitValues[0], VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR),
2113             makeCommonSemaphoreSubmitInfo(semaphore, waitValues[1], VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR)};
2114         VkSemaphoreSubmitInfoKHR signalSemaphoreSubmitInfo =
2115             makeCommonSemaphoreSubmitInfo(semaphore, iter.timelineValue, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR);
2116 
2117         VkCommandBufferSubmitInfoKHR commandBufferSubmitInfo = makeCommonCommandBufferSubmitInfo(cmdBuffer);
2118         SynchronizationWrapperPtr synchronizationWrapper     = getSynchronizationWrapper(m_type, vk, true);
2119 
2120         synchronizationWrapper->addSubmitInfo(
2121             waitValuesCount,            // uint32_t                                waitSemaphoreInfoCount
2122             waitSemaphoreSubmitInfo,    // const VkSemaphoreSubmitInfoKHR*        pWaitSemaphoreInfos
2123             1u,                         // uint32_t                                commandBufferInfoCount
2124             &commandBufferSubmitInfo,   // const VkCommandBufferSubmitInfoKHR*    pCommandBufferInfos
2125             1u,                         // uint32_t                                signalSemaphoreInfoCount
2126             &signalSemaphoreSubmitInfo, // const VkSemaphoreSubmitInfoKHR*        pSignalSemaphoreInfos
2127             true, true);
2128 
2129         VK_CHECK(synchronizationWrapper->queueSubmit(iter.queue, DE_NULL));
2130     }
2131 
iterate(void)2132     tcu::TestStatus iterate(void)
2133     {
2134         const DeviceInterface &vk = *m_deviceDriver;
2135         const VkDevice device     = *m_device;
2136         const Unique<VkSemaphore> semaphore(createSemaphoreType(vk, device, VK_SEMAPHORE_TYPE_TIMELINE));
2137         Unique<VkCommandPool> writeCmdPool(createCommandPool(
2138             vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_context.getUniversalQueueFamilyIndex()));
2139         Unique<VkCommandBuffer> writeCmdBuffer(makeCommandBuffer(vk, device, *writeCmdPool));
2140         std::vector<SharedPtr<Move<VkCommandPool>>> copyCmdPools;
2141         std::vector<SharedPtr<Move<VkCommandBuffer>>> copyPtrCmdBuffers;
2142         std::vector<SharedPtr<Move<VkCommandPool>>> readCmdPools;
2143         std::vector<SharedPtr<Move<VkCommandBuffer>>> readPtrCmdBuffers;
2144 
2145         for (uint32_t copyOpNdx = 0; copyOpNdx < m_copyIterations.size(); copyOpNdx++)
2146         {
2147             copyCmdPools.push_back(
2148                 makeVkSharedPtr(createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
2149                                                   m_copyIterations[copyOpNdx]->queueFamilyIdx)));
2150             copyPtrCmdBuffers.push_back(makeVkSharedPtr(makeCommandBuffer(vk, device, **copyCmdPools.back())));
2151 
2152             readCmdPools.push_back(
2153                 makeVkSharedPtr(createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
2154                                                   m_readIterations[copyOpNdx]->queueFamilyIdx)));
2155             readPtrCmdBuffers.push_back(makeVkSharedPtr(makeCommandBuffer(vk, device, **readCmdPools.back())));
2156         }
2157 
2158         // Randomize the data copied over.
2159         {
2160             const Data startData = m_writeIteration->op->getData();
2161             Data randomizedData;
2162             std::vector<uint8_t> dataArray;
2163 
2164             dataArray.resize(startData.size);
2165             randomizeData(dataArray, m_resourceDesc);
2166             randomizedData.size = dataArray.size();
2167             randomizedData.data = &dataArray[0];
2168             m_writeIteration->op->setData(randomizedData);
2169         }
2170 
2171         // Record command buffers
2172         {
2173             beginCommandBuffer(vk, *writeCmdBuffer);
2174             m_writeIteration->op->recordCommands(*writeCmdBuffer);
2175             endCommandBuffer(vk, *writeCmdBuffer);
2176 
2177             for (uint32_t copyOpIdx = 0; copyOpIdx < m_copyIterations.size(); copyOpIdx++)
2178             {
2179                 beginCommandBuffer(vk, **copyPtrCmdBuffers[copyOpIdx]);
2180                 recordBarrier(vk, **copyPtrCmdBuffers[copyOpIdx], *m_writeIteration, *m_copyIterations[copyOpIdx],
2181                               *m_writeResource, copyOpIdx == 0);
2182                 m_copyIterations[copyOpIdx]->op->recordCommands(**copyPtrCmdBuffers[copyOpIdx]);
2183                 endCommandBuffer(vk, **copyPtrCmdBuffers[copyOpIdx]);
2184             }
2185 
2186             for (uint32_t readOpIdx = 0; readOpIdx < m_readIterations.size(); readOpIdx++)
2187             {
2188                 beginCommandBuffer(vk, **readPtrCmdBuffers[readOpIdx]);
2189                 recordBarrier(vk, **readPtrCmdBuffers[readOpIdx], *m_copyIterations[readOpIdx],
2190                               *m_readIterations[readOpIdx], *m_copyResources[readOpIdx], true);
2191                 m_readIterations[readOpIdx]->op->recordCommands(**readPtrCmdBuffers[readOpIdx]);
2192                 endCommandBuffer(vk, **readPtrCmdBuffers[readOpIdx]);
2193             }
2194         }
2195 
2196         // Submit
2197         {
2198             submit(vk, *writeCmdBuffer, *m_writeIteration, *semaphore, &m_hostTimelineValue, 1);
2199             for (uint32_t copyOpIdx = 0; copyOpIdx < m_copyIterations.size(); copyOpIdx++)
2200             {
2201                 uint64_t waitValues[2] = {
2202                     m_writeIteration->timelineValue,
2203                     copyOpIdx > 0 ? m_copyIterations[copyOpIdx - 1]->timelineValue : 0,
2204                 };
2205 
2206                 submit(vk, **copyPtrCmdBuffers[copyOpIdx], *m_copyIterations[copyOpIdx], *semaphore, waitValues,
2207                        copyOpIdx > 0 ? 2 : 1);
2208             }
2209             for (uint32_t readOpIdx = 0; readOpIdx < m_readIterations.size(); readOpIdx++)
2210             {
2211                 uint64_t waitValues[2] = {
2212                     m_copyIterations[readOpIdx]->timelineValue,
2213                     readOpIdx > 0 ? m_readIterations[readOpIdx - 1]->timelineValue :
2214                                     m_copyIterations.back()->timelineValue,
2215                 };
2216 
2217                 submit(vk, **readPtrCmdBuffers[readOpIdx], *m_readIterations[readOpIdx], *semaphore, waitValues, 2);
2218             }
2219 
2220             // Kick off the whole chain from the host.
2221             hostSignal(vk, device, *semaphore, m_hostTimelineValue);
2222             VK_CHECK(vk.deviceWaitIdle(device));
2223         }
2224 
2225         {
2226             const Data expected = m_writeIteration->op->getData();
2227 
2228             for (uint32_t readOpIdx = 0; readOpIdx < m_readIterations.size(); readOpIdx++)
2229             {
2230                 const Data actual = m_readIterations[readOpIdx]->op->getData();
2231 
2232                 if (0 != deMemCmp(expected.data, actual.data, expected.size))
2233                     return tcu::TestStatus::fail("Memory contents don't match");
2234             }
2235         }
2236 
2237         return tcu::TestStatus::pass("OK");
2238     }
2239 
2240 protected:
2241     SynchronizationType m_type;
2242     ResourceDescription m_resourceDesc;
2243     const Unique<VkDevice> &m_device;
2244     const Context &m_context;
2245 #ifndef CTS_USES_VULKANSC
2246     de::MovePtr<vk::DeviceDriver> m_deviceDriver;
2247 #else
2248     de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> m_deviceDriver;
2249 #endif // CTS_USES_VULKANSC
2250     MovePtr<Allocator> m_allocator;
2251     OperationContext m_opContext;
2252     SharedPtr<QueueTimelineIteration> m_writeIteration;
2253     std::vector<SharedPtr<QueueTimelineIteration>> m_copyIterations;
2254     std::vector<SharedPtr<QueueTimelineIteration>> m_readIterations;
2255     SharedPtr<Resource> m_writeResource;
2256     std::vector<SharedPtr<Resource>> m_copyResources;
2257     uint64_t m_hostTimelineValue;
2258 };
2259 
2260 class OneToNTestCase : public TestCase
2261 {
2262 public:
OneToNTestCase(tcu::TestContext & testCtx,const std::string & name,SynchronizationType type,const ResourceDescription resourceDesc,const OperationName writeOp,const OperationName readOp,PipelineCacheData & pipelineCacheData)2263     OneToNTestCase(tcu::TestContext &testCtx, const std::string &name, SynchronizationType type,
2264                    const ResourceDescription resourceDesc, const OperationName writeOp, const OperationName readOp,
2265                    PipelineCacheData &pipelineCacheData)
2266         : TestCase(testCtx, name)
2267         , m_type(type)
2268         , m_resourceDesc(resourceDesc)
2269         , m_writeOp(makeOperationSupport(writeOp, resourceDesc).release())
2270         , m_readOp(makeOperationSupport(readOp, resourceDesc).release())
2271         , m_pipelineCacheData(pipelineCacheData)
2272     {
2273     }
2274 
checkSupport(Context & context) const2275     void checkSupport(Context &context) const override
2276     {
2277         context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
2278         if (m_type == SynchronizationType::SYNCHRONIZATION2)
2279             context.requireDeviceFunctionality("VK_KHR_synchronization2");
2280     }
2281 
initPrograms(SourceCollections & programCollection) const2282     void initPrograms(SourceCollections &programCollection) const override
2283     {
2284         m_writeOp->initPrograms(programCollection);
2285         m_readOp->initPrograms(programCollection);
2286 
2287         for (uint32_t copyOpNdx = 0; copyOpNdx < DE_LENGTH_OF_ARRAY(s_copyOps); copyOpNdx++)
2288         {
2289             if (isResourceSupported(s_copyOps[copyOpNdx], m_resourceDesc))
2290                 makeOperationSupport(s_copyOps[copyOpNdx], m_resourceDesc)->initPrograms(programCollection);
2291         }
2292     }
2293 
createInstance(Context & context) const2294     TestInstance *createInstance(Context &context) const override
2295     {
2296         return new OneToNTestInstance(context, m_type, m_resourceDesc, m_writeOp, m_readOp, m_pipelineCacheData);
2297     }
2298 
2299 private:
2300     SynchronizationType m_type;
2301     const ResourceDescription m_resourceDesc;
2302     const SharedPtr<OperationSupport> m_writeOp;
2303     const SharedPtr<OperationSupport> m_readOp;
2304     PipelineCacheData &m_pipelineCacheData;
2305 };
2306 
2307 class OneToNTests : public tcu::TestCaseGroup
2308 {
2309 public:
OneToNTests(tcu::TestContext & testCtx,SynchronizationType type)2310     OneToNTests(tcu::TestContext &testCtx, SynchronizationType type)
2311         : tcu::TestCaseGroup(testCtx, "one_to_n")
2312         , m_type(type)
2313     {
2314     }
2315 
init(void)2316     void init(void)
2317     {
2318         static const OperationName writeOps[] = {
2319             OPERATION_NAME_WRITE_COPY_BUFFER,
2320             OPERATION_NAME_WRITE_COPY_BUFFER_TO_IMAGE,
2321             OPERATION_NAME_WRITE_COPY_IMAGE_TO_BUFFER,
2322             OPERATION_NAME_WRITE_COPY_IMAGE,
2323             OPERATION_NAME_WRITE_BLIT_IMAGE,
2324             OPERATION_NAME_WRITE_SSBO_VERTEX,
2325             OPERATION_NAME_WRITE_SSBO_TESSELLATION_CONTROL,
2326             OPERATION_NAME_WRITE_SSBO_TESSELLATION_EVALUATION,
2327             OPERATION_NAME_WRITE_SSBO_GEOMETRY,
2328             OPERATION_NAME_WRITE_SSBO_FRAGMENT,
2329             OPERATION_NAME_WRITE_SSBO_COMPUTE,
2330             OPERATION_NAME_WRITE_SSBO_COMPUTE_INDIRECT,
2331             OPERATION_NAME_WRITE_IMAGE_VERTEX,
2332             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_CONTROL,
2333             OPERATION_NAME_WRITE_IMAGE_TESSELLATION_EVALUATION,
2334             OPERATION_NAME_WRITE_IMAGE_GEOMETRY,
2335             OPERATION_NAME_WRITE_IMAGE_FRAGMENT,
2336             OPERATION_NAME_WRITE_IMAGE_COMPUTE,
2337             OPERATION_NAME_WRITE_IMAGE_COMPUTE_INDIRECT,
2338         };
2339         static const OperationName readOps[] = {
2340             OPERATION_NAME_READ_COPY_BUFFER,
2341             OPERATION_NAME_READ_COPY_BUFFER_TO_IMAGE,
2342             OPERATION_NAME_READ_COPY_IMAGE_TO_BUFFER,
2343             OPERATION_NAME_READ_COPY_IMAGE,
2344             OPERATION_NAME_READ_BLIT_IMAGE,
2345             OPERATION_NAME_READ_UBO_VERTEX,
2346             OPERATION_NAME_READ_UBO_TESSELLATION_CONTROL,
2347             OPERATION_NAME_READ_UBO_TESSELLATION_EVALUATION,
2348             OPERATION_NAME_READ_UBO_GEOMETRY,
2349             OPERATION_NAME_READ_UBO_FRAGMENT,
2350             OPERATION_NAME_READ_UBO_COMPUTE,
2351             OPERATION_NAME_READ_UBO_COMPUTE_INDIRECT,
2352             OPERATION_NAME_READ_SSBO_VERTEX,
2353             OPERATION_NAME_READ_SSBO_TESSELLATION_CONTROL,
2354             OPERATION_NAME_READ_SSBO_TESSELLATION_EVALUATION,
2355             OPERATION_NAME_READ_SSBO_GEOMETRY,
2356             OPERATION_NAME_READ_SSBO_FRAGMENT,
2357             OPERATION_NAME_READ_SSBO_COMPUTE,
2358             OPERATION_NAME_READ_SSBO_COMPUTE_INDIRECT,
2359             OPERATION_NAME_READ_IMAGE_VERTEX,
2360             OPERATION_NAME_READ_IMAGE_TESSELLATION_CONTROL,
2361             OPERATION_NAME_READ_IMAGE_TESSELLATION_EVALUATION,
2362             OPERATION_NAME_READ_IMAGE_GEOMETRY,
2363             OPERATION_NAME_READ_IMAGE_FRAGMENT,
2364             OPERATION_NAME_READ_IMAGE_COMPUTE,
2365             OPERATION_NAME_READ_IMAGE_COMPUTE_INDIRECT,
2366             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW,
2367             OPERATION_NAME_READ_INDIRECT_BUFFER_DRAW_INDEXED,
2368             OPERATION_NAME_READ_INDIRECT_BUFFER_DISPATCH,
2369             OPERATION_NAME_READ_VERTEX_INPUT,
2370         };
2371 
2372         for (int writeOpNdx = 0; writeOpNdx < DE_LENGTH_OF_ARRAY(writeOps); ++writeOpNdx)
2373             for (int readOpNdx = 0; readOpNdx < DE_LENGTH_OF_ARRAY(readOps); ++readOpNdx)
2374             {
2375                 const OperationName writeOp   = writeOps[writeOpNdx];
2376                 const OperationName readOp    = readOps[readOpNdx];
2377                 const std::string opGroupName = getOperationName(writeOp) + "_" + getOperationName(readOp);
2378                 bool empty                    = true;
2379 
2380                 de::MovePtr<tcu::TestCaseGroup> opGroup(new tcu::TestCaseGroup(m_testCtx, opGroupName.c_str()));
2381 
2382                 for (int resourceNdx = 0; resourceNdx < DE_LENGTH_OF_ARRAY(s_resources); ++resourceNdx)
2383                 {
2384                     const ResourceDescription &resource = s_resources[resourceNdx];
2385                     std::string name                    = getResourceName(resource);
2386 
2387                     if (isResourceSupported(writeOp, resource) && isResourceSupported(readOp, resource))
2388                     {
2389                         opGroup->addChild(new OneToNTestCase(m_testCtx, name, m_type, resource, writeOp, readOp,
2390                                                              m_pipelineCacheData));
2391                         empty = false;
2392                     }
2393                 }
2394                 if (!empty)
2395                     addChild(opGroup.release());
2396             }
2397     }
2398 
deinit(void)2399     void deinit(void)
2400     {
2401         cleanupGroup();
2402     }
2403 
2404 private:
2405     SynchronizationType m_type;
2406 
2407     // synchronization.op tests share pipeline cache data to speed up test
2408     // execution.
2409     PipelineCacheData m_pipelineCacheData;
2410 };
2411 
2412 #ifndef CTS_USES_VULKANSC
2413 
2414 // Make a nonzero initial value for a semaphore. semId is assigned to each semaphore by callers.
getInitialValue(uint32_t semId)2415 uint64_t getInitialValue(uint32_t semId)
2416 {
2417     return (semId + 1ull) * 1000ull;
2418 }
2419 
2420 struct SparseBindParams
2421 {
2422     uint32_t numWaitSems;
2423     uint32_t numSignalSems;
2424 };
2425 
2426 class SparseBindCase : public vkt::TestCase
2427 {
2428 public:
2429     SparseBindCase(tcu::TestContext &testCtx, const std::string &name, const SparseBindParams &params);
~SparseBindCase(void)2430     virtual ~SparseBindCase(void)
2431     {
2432     }
2433 
2434     virtual TestInstance *createInstance(Context &context) const;
2435     virtual void checkSupport(Context &context) const;
2436 
2437 private:
2438     SparseBindParams m_params;
2439 };
2440 
2441 class SparseBindInstance : public vkt::TestInstance
2442 {
2443 public:
2444     SparseBindInstance(Context &context, const SparseBindParams &params);
~SparseBindInstance(void)2445     virtual ~SparseBindInstance(void)
2446     {
2447     }
2448 
2449     virtual tcu::TestStatus iterate(void);
2450 
2451 private:
2452     SparseBindParams m_params;
2453 };
2454 
SparseBindCase(tcu::TestContext & testCtx,const std::string & name,const SparseBindParams & params)2455 SparseBindCase::SparseBindCase(tcu::TestContext &testCtx, const std::string &name, const SparseBindParams &params)
2456     : vkt::TestCase(testCtx, name)
2457     , m_params(params)
2458 {
2459 }
2460 
createInstance(Context & context) const2461 TestInstance *SparseBindCase::createInstance(Context &context) const
2462 {
2463     return new SparseBindInstance(context, m_params);
2464 }
2465 
checkSupport(Context & context) const2466 void SparseBindCase::checkSupport(Context &context) const
2467 {
2468     // Check support for sparse binding and timeline semaphores.
2469     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
2470     context.requireDeviceFunctionality("VK_KHR_timeline_semaphore");
2471 }
2472 
SparseBindInstance(Context & context,const SparseBindParams & params)2473 SparseBindInstance::SparseBindInstance(Context &context, const SparseBindParams &params)
2474     : vkt::TestInstance(context)
2475     , m_params(params)
2476 {
2477 }
2478 
queueBindSparse(const vk::DeviceInterface & vkd,vk::VkQueue queue,uint32_t bindInfoCount,const vk::VkBindSparseInfo * pBindInfo)2479 void queueBindSparse(const vk::DeviceInterface &vkd, vk::VkQueue queue, uint32_t bindInfoCount,
2480                      const vk::VkBindSparseInfo *pBindInfo)
2481 {
2482     VK_CHECK(vkd.queueBindSparse(queue, bindInfoCount, pBindInfo, DE_NULL));
2483 }
2484 
2485 #endif // CTS_USES_VULKANSC
2486 
2487 struct SemaphoreWithInitial
2488 {
2489     vk::Move<vk::VkSemaphore> semaphore;
2490     uint64_t initialValue;
2491 
SemaphoreWithInitialvkt::synchronization::__anonb0b5108b0111::SemaphoreWithInitial2492     SemaphoreWithInitial(vk::Move<vk::VkSemaphore> &&sem, uint64_t initVal) : semaphore(sem), initialValue(initVal)
2493     {
2494     }
2495 
SemaphoreWithInitialvkt::synchronization::__anonb0b5108b0111::SemaphoreWithInitial2496     SemaphoreWithInitial(SemaphoreWithInitial &&other) : semaphore(other.semaphore), initialValue(other.initialValue)
2497     {
2498     }
2499 };
2500 
2501 using SemaphoreVec = std::vector<SemaphoreWithInitial>;
2502 using PlainSemVec  = std::vector<vk::VkSemaphore>;
2503 using ValuesVec    = std::vector<uint64_t>;
2504 
2505 #ifndef CTS_USES_VULKANSC
2506 
getHandles(const SemaphoreVec & semVec)2507 PlainSemVec getHandles(const SemaphoreVec &semVec)
2508 {
2509     PlainSemVec handlesVec;
2510     handlesVec.reserve(semVec.size());
2511 
2512     const auto conversion = [](const SemaphoreWithInitial &s) { return s.semaphore.get(); };
2513     std::transform(begin(semVec), end(semVec), std::back_inserter(handlesVec), conversion);
2514 
2515     return handlesVec;
2516 }
2517 
getInitialValues(const SemaphoreVec & semVec)2518 ValuesVec getInitialValues(const SemaphoreVec &semVec)
2519 {
2520     ValuesVec initialValues;
2521     initialValues.reserve(semVec.size());
2522 
2523     const auto conversion = [](const SemaphoreWithInitial &s) { return s.initialValue; };
2524     std::transform(begin(semVec), end(semVec), std::back_inserter(initialValues), conversion);
2525 
2526     return initialValues;
2527 }
2528 
2529 // Increases values in the vector by one.
getNextValues(const ValuesVec & values)2530 ValuesVec getNextValues(const ValuesVec &values)
2531 {
2532     ValuesVec nextValues;
2533     nextValues.reserve(values.size());
2534 
2535     std::transform(begin(values), end(values), std::back_inserter(nextValues), [](uint64_t v) { return v + 1ull; });
2536     return nextValues;
2537 }
2538 
createTimelineSemaphore(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t semId)2539 SemaphoreWithInitial createTimelineSemaphore(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t semId)
2540 {
2541     const auto initialValue = getInitialValue(semId);
2542     return SemaphoreWithInitial(createSemaphoreType(vkd, device, vk::VK_SEMAPHORE_TYPE_TIMELINE, 0u, initialValue),
2543                                 initialValue);
2544 }
2545 
2546 // Signal the given semaphores with the corresponding values.
hostSignal(const vk::DeviceInterface & vkd,vk::VkDevice device,const PlainSemVec & semaphores,const ValuesVec & signalValues)2547 void hostSignal(const vk::DeviceInterface &vkd, vk::VkDevice device, const PlainSemVec &semaphores,
2548                 const ValuesVec &signalValues)
2549 {
2550     DE_ASSERT(semaphores.size() == signalValues.size());
2551 
2552     for (size_t i = 0; i < semaphores.size(); ++i)
2553         hostSignal(vkd, device, semaphores[i], signalValues[i]);
2554 }
2555 
2556 // Wait for the given semaphores and their corresponding values.
hostWait(const vk::DeviceInterface & vkd,vk::VkDevice device,const PlainSemVec & semaphores,const ValuesVec & waitValues)2557 void hostWait(const vk::DeviceInterface &vkd, vk::VkDevice device, const PlainSemVec &semaphores,
2558               const ValuesVec &waitValues)
2559 {
2560     DE_ASSERT(semaphores.size() == waitValues.size() && !semaphores.empty());
2561 
2562     constexpr uint64_t kTimeout = 10000000000ull; // 10 seconds in nanoseconds.
2563 
2564     const vk::VkSemaphoreWaitInfo waitInfo = {
2565         vk::VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, // VkStructureType sType;
2566         nullptr,                                   // const void* pNext;
2567         0u,                                        // VkSemaphoreWaitFlags flags;
2568         static_cast<uint32_t>(semaphores.size()),  // uint32_t semaphoreCount;
2569         semaphores.data(),                         // const VkSemaphore* pSemaphores;
2570         waitValues.data(),                         // const uint64_t* pValues;
2571     };
2572     VK_CHECK(vkd.waitSemaphores(device, &waitInfo, kTimeout));
2573 }
2574 
iterate(void)2575 tcu::TestStatus SparseBindInstance::iterate(void)
2576 {
2577     const auto &vkd   = m_context.getDeviceInterface();
2578     const auto device = m_context.getDevice();
2579     const auto queue  = m_context.getSparseQueue();
2580 
2581     SemaphoreVec waitSemaphores;
2582     SemaphoreVec signalSemaphores;
2583 
2584     // Create as many semaphores as needed to wait and signal.
2585     for (uint32_t i = 0; i < m_params.numWaitSems; ++i)
2586         waitSemaphores.emplace_back(createTimelineSemaphore(vkd, device, i));
2587     for (uint32_t i = 0; i < m_params.numSignalSems; ++i)
2588         signalSemaphores.emplace_back(createTimelineSemaphore(vkd, device, i + m_params.numWaitSems));
2589 
2590     // Get handles for all semaphores.
2591     const auto waitSemHandles   = getHandles(waitSemaphores);
2592     const auto signalSemHandles = getHandles(signalSemaphores);
2593 
2594     // Get initial values for all semaphores.
2595     const auto waitSemValues   = getInitialValues(waitSemaphores);
2596     const auto signalSemValues = getInitialValues(signalSemaphores);
2597 
2598     // Get next expected values for all semaphores.
2599     const auto waitNextValues   = getNextValues(waitSemValues);
2600     const auto signalNextValues = getNextValues(signalSemValues);
2601 
2602     const vk::VkTimelineSemaphoreSubmitInfo timeLineSubmitInfo = {
2603         vk::VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,           // VkStructureType sType;
2604         nullptr,                                                        // const void* pNext;
2605         static_cast<uint32_t>(waitNextValues.size()),                   // uint32_t waitSemaphoreValueCount;
2606         (waitNextValues.empty() ? nullptr : waitNextValues.data()),     // const uint64_t* pWaitSemaphoreValues;
2607         static_cast<uint32_t>(signalNextValues.size()),                 // uint32_t signalSemaphoreValueCount;
2608         (signalNextValues.empty() ? nullptr : signalNextValues.data()), // const uint64_t* pSignalSemaphoreValues;
2609     };
2610 
2611     const vk::VkBindSparseInfo bindInfo = {
2612         vk::VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                     // VkStructureType sType;
2613         &timeLineSubmitInfo,                                        // const void* pNext;
2614         static_cast<uint32_t>(waitSemHandles.size()),               // uint32_t waitSemaphoreCount;
2615         (waitSemHandles.empty() ? nullptr : waitSemHandles.data()), // const VkSemaphore* pWaitSemaphores;
2616         0u,                                                         // uint32_t bufferBindCount;
2617         nullptr,                                                    // const VkSparseBufferMemoryBindInfo* pBufferBinds;
2618         0u,                                                         // uint32_t imageOpaqueBindCount;
2619         nullptr,                                        // const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
2620         0u,                                             // uint32_t imageBindCount;
2621         nullptr,                                        // const VkSparseImageMemoryBindInfo* pImageBinds;
2622         static_cast<uint32_t>(signalSemHandles.size()), // uint32_t signalSemaphoreCount;
2623         (signalSemHandles.empty() ? nullptr : signalSemHandles.data()), // const VkSemaphore* pSignalSemaphores;
2624     };
2625     queueBindSparse(vkd, queue, 1u, &bindInfo);
2626 
2627     // If the device needs to wait and signal, check the signal semaphores have not been signaled yet.
2628     if (!waitSemaphores.empty() && !signalSemaphores.empty())
2629     {
2630         uint64_t value;
2631         for (size_t i = 0; i < signalSemaphores.size(); ++i)
2632         {
2633             value = 0;
2634             VK_CHECK(vkd.getSemaphoreCounterValue(device, signalSemHandles[i], &value));
2635 
2636             if (!value)
2637                 TCU_FAIL("Invalid value obtained from vkGetSemaphoreCounterValue()");
2638 
2639             if (value != signalSemValues[i])
2640             {
2641                 std::ostringstream msg;
2642                 msg << "vkQueueBindSparse() may not have waited before signaling semaphore " << i << " (expected value "
2643                     << signalSemValues[i] << " but obtained " << value << ")";
2644                 TCU_FAIL(msg.str());
2645             }
2646         }
2647     }
2648 
2649     // Signal semaphores the sparse bind command is waiting on.
2650     hostSignal(vkd, device, waitSemHandles, waitNextValues);
2651 
2652     // Wait for semaphores the sparse bind command is supposed to signal.
2653     if (!signalSemaphores.empty())
2654         hostWait(vkd, device, signalSemHandles, signalNextValues);
2655 
2656     VK_CHECK(vkd.deviceWaitIdle(device));
2657     return tcu::TestStatus::pass("Pass");
2658 }
2659 
2660 class SparseBindGroup : public tcu::TestCaseGroup
2661 {
2662 public:
2663     // vkQueueBindSparse combined with timeline semaphores
SparseBindGroup(tcu::TestContext & testCtx)2664     SparseBindGroup(tcu::TestContext &testCtx) : tcu::TestCaseGroup(testCtx, "sparse_bind")
2665     {
2666     }
2667 
init(void)2668     virtual void init(void)
2669     {
2670         static const struct
2671         {
2672             uint32_t waitSems;
2673             uint32_t sigSems;
2674             std::string name;
2675         } kSparseBindCases[] = {
2676             // No semaphores to wait for or signal
2677             {0u, 0u, "no_sems"},
2678             // Signal semaphore without waiting for any other
2679             {0u, 1u, "no_wait_sig"},
2680             // Wait for semaphore but do not signal any other
2681             {1u, 0u, "wait_no_sig"},
2682             // Wait for semaphore and signal a second one
2683             {1u, 1u, "wait_and_sig"},
2684             // Wait for two semaphores and signal two other ones
2685             {2u, 2u, "wait_and_sig_2"},
2686         };
2687 
2688         for (int i = 0; i < DE_LENGTH_OF_ARRAY(kSparseBindCases); ++i)
2689             addChild(new SparseBindCase(m_testCtx, kSparseBindCases[i].name,
2690                                         SparseBindParams{kSparseBindCases[i].waitSems, kSparseBindCases[i].sigSems}));
2691     }
2692 };
2693 
2694 #endif // CTS_USES_VULKANSC
2695 
2696 } // namespace
2697 
createTimelineSemaphoreTests(tcu::TestContext & testCtx)2698 tcu::TestCaseGroup *createTimelineSemaphoreTests(tcu::TestContext &testCtx)
2699 {
2700     const SynchronizationType type(SynchronizationType::LEGACY);
2701     de::MovePtr<tcu::TestCaseGroup> basicTests(new tcu::TestCaseGroup(testCtx, "timeline_semaphore"));
2702 
2703     basicTests->addChild(new LegacyDeviceHostTests(testCtx));
2704     basicTests->addChild(new OneToNTests(testCtx, type));
2705     basicTests->addChild(new WaitBeforeSignalTests(testCtx, type));
2706     basicTests->addChild(new WaitTests(testCtx, type));
2707 #ifndef CTS_USES_VULKANSC
2708     basicTests->addChild(new SparseBindGroup(testCtx));
2709 #endif // CTS_USES_VULKANSC
2710 
2711     return basicTests.release();
2712 }
2713 
createSynchronization2TimelineSemaphoreTests(tcu::TestContext & testCtx)2714 tcu::TestCaseGroup *createSynchronization2TimelineSemaphoreTests(tcu::TestContext &testCtx)
2715 {
2716     const SynchronizationType type(SynchronizationType::SYNCHRONIZATION2);
2717     de::MovePtr<tcu::TestCaseGroup> basicTests(new tcu::TestCaseGroup(testCtx, "timeline_semaphore"));
2718 
2719     basicTests->addChild(new Sytnchronization2DeviceHostTests(testCtx));
2720     basicTests->addChild(new OneToNTests(testCtx, type));
2721     basicTests->addChild(new WaitBeforeSignalTests(testCtx, type));
2722     basicTests->addChild(new WaitTests(testCtx, type));
2723 
2724     return basicTests.release();
2725 }
2726 
2727 } // namespace synchronization
2728 } // namespace vkt
2729