vulkan/synchronization/vktSynchronizationInternallySynchronizedObjectsTests.cpp

/*------------------------------------------------------------------------
 * Vulkan Conformance Tests
 * ------------------------
 *
 * Copyright (c) 2016 The Khronos Group Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *//*!
 * \file
 * \brief Synchronization internally synchronized objects tests
 *//*--------------------------------------------------------------------*/

#include "vktSynchronizationInternallySynchronizedObjectsTests.hpp"
#include "vktTestCaseUtil.hpp"
#include "vktSynchronizationUtil.hpp"
#include "vktCustomInstancesDevices.hpp"

#include "vkRef.hpp"
#include "tcuDefs.hpp"
#include "vkTypeUtil.hpp"
#include "vkBarrierUtil.hpp"
#include "vkPlatform.hpp"
#include "vkBuilderUtil.hpp"
#include "vkImageUtil.hpp"
#include "vkCmdUtil.hpp"
#include "vkObjUtil.hpp"
#include "vkDeviceUtil.hpp"
#include "vkSafetyCriticalUtil.hpp"

#include "tcuResultCollector.hpp"
#include "tcuCommandLine.hpp"

#include "deThread.hpp"
#include "deMutex.hpp"
#include "deSharedPtr.hpp"
#include "deSpinBarrier.hpp"

#include <limits>
#include <iterator>

namespace vkt
{
namespace synchronization
{
namespace
{
using namespace vk;

using std::exception;
using std::map;
using std::ostringstream;
using std::string;
using std::vector;

using tcu::ResultCollector;
using tcu::TestContext;
using tcu::TestException;
using tcu::TestStatus;

using de::clamp;
using de::MovePtr;
using de::Mutex;
using de::SharedPtr;
using de::Thread;
using de::UniquePtr;

template <typename T>
inline SharedPtr<Move<T>> makeVkSharedPtr(Move<T> move)
{
    return SharedPtr<Move<T>>(new Move<T>(move));
}

#ifndef CTS_USES_VULKANSC
enum
{
    EXECUTION_PER_THREAD = 100,
    BUFFER_ELEMENT_COUNT = 16,
    BUFFER_SIZE          = BUFFER_ELEMENT_COUNT * 4
};
#else
enum
{
    EXECUTION_PER_THREAD = 10,
    BUFFER_ELEMENT_COUNT = 16,
    BUFFER_SIZE          = BUFFER_ELEMENT_COUNT * 4
};
#endif // CTS_USES_VULKANSC

class MultiQueues
{
    typedef struct QueueType
    {
        vector<VkQueue> queues;
        vector<bool> available;
        vector<SharedPtr<Move<VkCommandPool>>> commandPools;
    } Queues;

public:
    inline void addQueueFamilyIndex(const uint32_t &queueFamilyIndex, const uint32_t &count)
    {
        Queues temp;
        vector<bool>::iterator it;
        it = temp.available.begin();
        temp.available.insert(it, count, false);

        temp.queues.resize(count);

        m_queues[queueFamilyIndex] = temp;
    }

    uint32_t getQueueFamilyIndex(const int index) const
    {
        map<uint32_t, Queues>::const_iterator it = begin(m_queues);
        std::advance(it, index);
        return it->first;
    }

    inline size_t countQueueFamilyIndex(void)
    {
        return m_queues.size();
    }

    Queues &getQueues(int index)
    {
        map<uint32_t, Queues>::iterator it = m_queues.begin();
        advance(it, index);
        return it->second;
    }

    bool getFreeQueue(const DeviceInterface &vk, const VkDevice device, uint32_t &returnQueueFamilyIndex,
                      VkQueue &returnQueues, Move<VkCommandBuffer> &commandBuffer, int &returnQueueIndex)
    {
        for (int queueFamilyIndexNdx = 0; queueFamilyIndexNdx < static_cast<int>(m_queues.size());
             ++queueFamilyIndexNdx)
        {
            Queues &queue = m_queues[getQueueFamilyIndex(queueFamilyIndexNdx)];
            for (int queueNdx = 0; queueNdx < static_cast<int>(queue.queues.size()); ++queueNdx)
            {
                m_mutex.lock();
                if (queue.available[queueNdx])
                {
                    queue.available[queueNdx] = false;
                    returnQueueFamilyIndex    = getQueueFamilyIndex(queueFamilyIndexNdx);
                    returnQueues              = queue.queues[queueNdx];
                    commandBuffer             = makeCommandBuffer(vk, device, queue.commandPools[queueNdx]->get());
                    returnQueueIndex          = queueNdx;
                    m_mutex.unlock();
                    return true;
                }
                m_mutex.unlock();
            }
        }
        return false;
    }

    void releaseQueue(const uint32_t &queueFamilyIndex, const int &queueIndex, Move<VkCommandBuffer> &commandBuffer)
    {
        m_mutex.lock();
        commandBuffer                                    = Move<VkCommandBuffer>();
        m_queues[queueFamilyIndex].available[queueIndex] = true;
        m_mutex.unlock();
    }

    inline void setDevice(Move<VkDevice> device, const Context &context)
    {
        m_logicalDevice = device;
#ifndef CTS_USES_VULKANSC
        m_deviceDriver = de::MovePtr<DeviceDriver>(
            new DeviceDriver(context.getPlatformInterface(), context.getInstance(), *m_logicalDevice,
                             context.getUsedApiVersion(), context.getTestContext().getCommandLine()));
#else
        m_deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
            new DeviceDriverSC(context.getPlatformInterface(), context.getInstance(), *m_logicalDevice,
                               context.getTestContext().getCommandLine(), context.getResourceInterface(),
                               context.getDeviceVulkanSC10Properties(), context.getDeviceProperties(),
                               context.getUsedApiVersion()),
            vk::DeinitDeviceDeleter(context.getResourceInterface().get(), *m_logicalDevice));
#endif // CTS_USES_VULKANSC
    }

    inline VkDevice getDevice(void)
    {
        return *m_logicalDevice;
    }

    inline DeviceInterface &getDeviceInterface(void)
    {
        return *m_deviceDriver;
    }

    MovePtr<Allocator> m_allocator;

protected:
    Move<VkDevice> m_logicalDevice;
#ifndef CTS_USES_VULKANSC
    de::MovePtr<vk::DeviceDriver> m_deviceDriver;
#else
    de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> m_deviceDriver;
#endif // CTS_USES_VULKANSC
    map<uint32_t, Queues> m_queues;
    Mutex m_mutex;
};

MovePtr<Allocator> createAllocator(const Context &context, const VkDevice &device)
{
    const DeviceInterface &deviceInterface = context.getDeviceInterface();
    const InstanceInterface &instance      = context.getInstanceInterface();
    const VkPhysicalDevice physicalDevice  = context.getPhysicalDevice();
    const VkPhysicalDeviceMemoryProperties deviceMemoryProperties =
        getPhysicalDeviceMemoryProperties(instance, physicalDevice);

    // Create memory allocator for device
    return MovePtr<Allocator>(new SimpleAllocator(deviceInterface, device, deviceMemoryProperties));
}

bool checkQueueFlags(const VkQueueFlags &availableFlag, const VkQueueFlags &neededFlag)
{
    if (VK_QUEUE_TRANSFER_BIT == neededFlag)
    {
        if ((availableFlag & VK_QUEUE_GRAPHICS_BIT) == VK_QUEUE_GRAPHICS_BIT ||
            (availableFlag & VK_QUEUE_COMPUTE_BIT) == VK_QUEUE_COMPUTE_BIT ||
            (availableFlag & VK_QUEUE_TRANSFER_BIT) == VK_QUEUE_TRANSFER_BIT)
            return true;
    }
    else if ((availableFlag & neededFlag) == neededFlag)
    {
        return true;
    }
    return false;
}

MovePtr<MultiQueues> createQueues(Context &context, const VkQueueFlags &queueFlag, const VkInstance &instance,
                                  const InstanceInterface &vki)
{
    const VkPhysicalDevice physicalDevice = chooseDevice(vki, instance, context.getTestContext().getCommandLine());
    MovePtr<MultiQueues> moveQueues(new MultiQueues());
    MultiQueues &queues = *moveQueues;
    VkDeviceCreateInfo deviceInfo;
    VkPhysicalDeviceFeatures deviceFeatures;
    vector<VkQueueFamilyProperties> queueFamilyProperties;
    vector<float> queuePriorities;
    vector<VkDeviceQueueCreateInfo> queueInfos;

    queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(vki, physicalDevice);

    for (uint32_t queuePropertiesNdx = 0; queuePropertiesNdx < queueFamilyProperties.size(); ++queuePropertiesNdx)
    {
        if (checkQueueFlags(queueFamilyProperties[queuePropertiesNdx].queueFlags, queueFlag))
        {
            queues.addQueueFamilyIndex(queuePropertiesNdx, queueFamilyProperties[queuePropertiesNdx].queueCount);
        }
    }

    if (queues.countQueueFamilyIndex() == 0)
    {
        TCU_THROW(NotSupportedError, "Queue not found");
    }

    {
        vector<float>::iterator it = queuePriorities.begin();
        unsigned int maxQueueCount = 0;
        for (int queueFamilyIndexNdx = 0; queueFamilyIndexNdx < static_cast<int>(queues.countQueueFamilyIndex());
             ++queueFamilyIndexNdx)
        {
            if (queues.getQueues(queueFamilyIndexNdx).queues.size() > maxQueueCount)
                maxQueueCount = static_cast<unsigned int>(queues.getQueues(queueFamilyIndexNdx).queues.size());
        }
        queuePriorities.insert(it, maxQueueCount, 1.0);
    }

    for (int queueFamilyIndexNdx = 0; queueFamilyIndexNdx < static_cast<int>(queues.countQueueFamilyIndex());
         ++queueFamilyIndexNdx)
    {
        VkDeviceQueueCreateInfo queueInfo;
        const uint32_t queueCount = static_cast<uint32_t>(queues.getQueues(queueFamilyIndexNdx).queues.size());

        deMemset(&queueInfo, 0, sizeof(queueInfo));

        queueInfo.sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
        queueInfo.pNext            = DE_NULL;
        queueInfo.flags            = (VkDeviceQueueCreateFlags)0u;
        queueInfo.queueFamilyIndex = queues.getQueueFamilyIndex(queueFamilyIndexNdx);
        queueInfo.queueCount       = queueCount;
        queueInfo.pQueuePriorities = &queuePriorities[0];

        queueInfos.push_back(queueInfo);
    }

    deMemset(&deviceInfo, 0, sizeof(deviceInfo));
    vki.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);

    void *pNext = DE_NULL;
#ifdef CTS_USES_VULKANSC
    VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ?
                                                                 context.getResourceInterface()->getStatMax() :
                                                                 resetDeviceObjectReservationCreateInfo();
    memReservationInfo.pNext                               = pNext;
    pNext                                                  = &memReservationInfo;

    VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
    sc10Features.pNext                              = pNext;
    pNext                                           = &sc10Features;

    VkPipelineCacheCreateInfo pcCI;
    std::vector<VkPipelinePoolSize> poolSizes;
    if (context.getTestContext().getCommandLine().isSubProcess())
    {
        if (context.getResourceInterface()->getCacheDataSize() > 0)
        {
            pcCI = {
                VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
                DE_NULL,                                      // const void* pNext;
                VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
                    VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
                context.getResourceInterface()->getCacheDataSize(),       // uintptr_t initialDataSize;
                context.getResourceInterface()->getCacheData()            // const void* pInitialData;
            };
            memReservationInfo.pipelineCacheCreateInfoCount = 1;
            memReservationInfo.pPipelineCacheCreateInfos    = &pcCI;
        }

        poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
        if (!poolSizes.empty())
        {
            memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
            memReservationInfo.pPipelinePoolSizes    = poolSizes.data();
        }
    }
#endif // CTS_USES_VULKANSC

    deviceInfo.sType                   = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
    deviceInfo.pNext                   = pNext;
    deviceInfo.enabledExtensionCount   = 0u;
    deviceInfo.ppEnabledExtensionNames = DE_NULL;
    deviceInfo.enabledLayerCount       = 0u;
    deviceInfo.ppEnabledLayerNames     = DE_NULL;
    deviceInfo.pEnabledFeatures        = &deviceFeatures;
    deviceInfo.queueCreateInfoCount    = static_cast<uint32_t>(queues.countQueueFamilyIndex());
    deviceInfo.pQueueCreateInfos       = &queueInfos[0];

    queues.setDevice(createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
                                        context.getPlatformInterface(), instance, vki, physicalDevice, &deviceInfo),
                     context);
    vk::DeviceInterface &vk = queues.getDeviceInterface();

    for (uint32_t queueFamilyIndex = 0; queueFamilyIndex < queues.countQueueFamilyIndex(); ++queueFamilyIndex)
    {
        for (uint32_t queueReqNdx = 0; queueReqNdx < queues.getQueues(queueFamilyIndex).queues.size(); ++queueReqNdx)
        {
            vk.getDeviceQueue(queues.getDevice(), queues.getQueueFamilyIndex(queueFamilyIndex), queueReqNdx,
                              &queues.getQueues(queueFamilyIndex).queues[queueReqNdx]);
            queues.getQueues(queueFamilyIndex).available[queueReqNdx] = true;
            queues.getQueues(queueFamilyIndex)
                .commandPools.push_back(makeVkSharedPtr(
                    createCommandPool(vk, queues.getDevice(), VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
                                      queues.getQueueFamilyIndex(queueFamilyIndex))));
        }
    }

    queues.m_allocator = createAllocator(context, queues.getDevice());
    return moveQueues;
}

TestStatus executeComputePipeline(const Context &context, const VkPipeline &pipeline,
                                  const VkPipelineLayout &pipelineLayout,
                                  const VkDescriptorSetLayout &descriptorSetLayout, MultiQueues &queues,
                                  const uint32_t &shadersExecutions)
{
    DE_UNREF(context);
    const DeviceInterface &vk = queues.getDeviceInterface();
    const VkDevice device     = queues.getDevice();
    uint32_t queueFamilyIndex;
    VkQueue queue;
    int queueIndex;
    Move<VkCommandBuffer> cmdBuffer;
    while (!queues.getFreeQueue(vk, device, queueFamilyIndex, queue, cmdBuffer, queueIndex))
    {
    }

    {
        const Unique<VkDescriptorPool> descriptorPool(
            DescriptorPoolBuilder()
                .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
                .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
        Buffer resultBuffer(vk, device, *queues.m_allocator,
                            makeBufferCreateInfo(BUFFER_SIZE, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
                            MemoryRequirement::HostVisible);
        const VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
            VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, BUFFER_SIZE);

        {
            const Allocation &alloc = resultBuffer.getAllocation();
            deMemset(alloc.getHostPtr(), 0, BUFFER_SIZE);
            flushAlloc(vk, device, alloc);
        }

        // Start recording commands
        beginCommandBuffer(vk, *cmdBuffer);

        vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

        // Create descriptor set
        const Unique<VkDescriptorSet> descriptorSet(
            makeDescriptorSet(vk, device, *descriptorPool, descriptorSetLayout));

        const VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, 0ull, BUFFER_SIZE);

        DescriptorSetUpdateBuilder()
            .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo)
            .update(vk, device);

        vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout, 0u, 1u,
                                 &descriptorSet.get(), 0u, DE_NULL);

        // Dispatch indirect compute command
        vk.cmdDispatch(*cmdBuffer, shadersExecutions, 1u, 1u);

        vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
                              (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
                              (const VkImageMemoryBarrier *)DE_NULL);

        // End recording commands
        endCommandBuffer(vk, *cmdBuffer);

        // Wait for command buffer execution finish
        submitCommandsAndWait(vk, device, queue, *cmdBuffer);
        queues.releaseQueue(queueFamilyIndex, queueIndex, cmdBuffer);

        {
            const Allocation &resultAlloc = resultBuffer.getAllocation();
            invalidateAlloc(vk, device, resultAlloc);

            const int32_t *ptr = reinterpret_cast<int32_t *>(resultAlloc.getHostPtr());
            for (int32_t ndx = 0; ndx < BUFFER_ELEMENT_COUNT; ++ndx)
            {
                if (ptr[ndx] != ndx)
                {
                    return TestStatus::fail("The data don't match");
                }
            }
        }
        return TestStatus::pass("Passed");
    }
}

TestStatus executeGraphicPipeline(const Context &context, const VkPipeline &pipeline,
                                  const VkPipelineLayout &pipelineLayout,
                                  const VkDescriptorSetLayout &descriptorSetLayout, MultiQueues &queues,
                                  const VkRenderPass &renderPass, const uint32_t shadersExecutions)
{
    DE_UNREF(context);
    const DeviceInterface &vk = queues.getDeviceInterface();
    const VkDevice device     = queues.getDevice();
    uint32_t queueFamilyIndex;
    VkQueue queue;
    int queueIndex;
    Move<VkCommandBuffer> cmdBuffer;
    while (!queues.getFreeQueue(vk, device, queueFamilyIndex, queue, cmdBuffer, queueIndex))
    {
    }

    {
        const Unique<VkDescriptorPool> descriptorPool(
            DescriptorPoolBuilder()
                .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
                .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
        Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, descriptorSetLayout);
        Buffer resultBuffer(vk, device, *queues.m_allocator,
                            makeBufferCreateInfo(BUFFER_SIZE, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
                            MemoryRequirement::HostVisible);
        const VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
            VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, BUFFER_SIZE);
        const VkFormat colorFormat        = VK_FORMAT_R8G8B8A8_UNORM;
        const VkExtent3D colorImageExtent = makeExtent3D(1u, 1u, 1u);
        const VkImageSubresourceRange colorImageSubresourceRange =
            makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
        de::MovePtr<Image> colorAttachmentImage = de::MovePtr<Image>(new Image(
            vk, device, *queues.m_allocator,
            makeImageCreateInfo(VK_IMAGE_TYPE_2D, colorImageExtent, colorFormat, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
                                VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_OPTIMAL),
            MemoryRequirement::Any));
        Move<VkImageView> colorAttachmentView = makeImageView(vk, device, **colorAttachmentImage, VK_IMAGE_VIEW_TYPE_2D,
                                                              colorFormat, colorImageSubresourceRange);
        Move<VkFramebuffer> framebuffer       = makeFramebuffer(vk, device, renderPass, *colorAttachmentView,
                                                                colorImageExtent.width, colorImageExtent.height);
        const VkDescriptorBufferInfo outputBufferDescriptorInfo =
            makeDescriptorBufferInfo(*resultBuffer, 0ull, BUFFER_SIZE);

        DescriptorSetUpdateBuilder()
            .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
            .update(vk, device);

        {
            const Allocation &alloc = resultBuffer.getAllocation();
            deMemset(alloc.getHostPtr(), 0, BUFFER_SIZE);
            flushAlloc(vk, device, alloc);
        }

        // Start recording commands
        beginCommandBuffer(vk, *cmdBuffer);
        // Change color attachment image layout
        {
            const VkImageMemoryBarrier colorAttachmentLayoutBarrier = makeImageMemoryBarrier(
                (VkAccessFlags)0, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, **colorAttachmentImage, colorImageSubresourceRange);

            vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                                  VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, DE_NULL, 0u,
                                  DE_NULL, 1u, &colorAttachmentLayoutBarrier);
        }

        {
            const VkRect2D renderArea  = makeRect2D(1u, 1u);
            const tcu::Vec4 clearColor = tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f);
            beginRenderPass(vk, *cmdBuffer, renderPass, *framebuffer, renderArea, clearColor);
        }

        vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
        vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u,
                                 &descriptorSet.get(), 0u, DE_NULL);

        vk.cmdDraw(*cmdBuffer, shadersExecutions, 1u, 0u, 0u);
        endRenderPass(vk, *cmdBuffer);

        vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
                              (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
                              (const VkImageMemoryBarrier *)DE_NULL);

        // End recording commands
        endCommandBuffer(vk, *cmdBuffer);

        // Wait for command buffer execution finish
        submitCommandsAndWait(vk, device, queue, *cmdBuffer);
        queues.releaseQueue(queueFamilyIndex, queueIndex, cmdBuffer);

        {
            const Allocation &resultAlloc = resultBuffer.getAllocation();
            invalidateAlloc(vk, device, resultAlloc);

            const int32_t *ptr = reinterpret_cast<int32_t *>(resultAlloc.getHostPtr());
            for (int32_t ndx = 0; ndx < BUFFER_ELEMENT_COUNT; ++ndx)
            {
                if (ptr[ndx] != ndx)
                {
                    return TestStatus::fail("The data don't match");
                }
            }
        }
        return TestStatus::pass("Passed");
    }
}

class ThreadGroupThread : private Thread
{
public:
    ThreadGroupThread(const Context &context, VkPipelineCache pipelineCache, const VkPipelineLayout &pipelineLayout,
                      const VkDescriptorSetLayout &descriptorSetLayout, MultiQueues &queues,
                      const vector<uint32_t> &shadersExecutions)
        : m_context(context)
        , m_pipelineCache(pipelineCache)
        , m_pipelineLayout(pipelineLayout)
        , m_descriptorSetLayout(descriptorSetLayout)
        , m_queues(queues)
        , m_shadersExecutions(shadersExecutions)
        , m_barrier(DE_NULL)
    {
    }

    virtual ~ThreadGroupThread(void)
    {
    }

    ResultCollector &getResultCollector(void)
    {
        return m_resultCollector;
    }

    void start(de::SpinBarrier *groupBarrier);
    using Thread::join;

protected:
    virtual TestStatus runThread() = 0;
    const Context &m_context;
    VkPipelineCache m_pipelineCache;
    const VkPipelineLayout &m_pipelineLayout;
    const VkDescriptorSetLayout &m_descriptorSetLayout;
    MultiQueues &m_queues;
    const vector<uint32_t> &m_shadersExecutions;

    void barrier(void);

private:
    ThreadGroupThread(const ThreadGroupThread &);
    ThreadGroupThread &operator=(const ThreadGroupThread &);

    void run(void)
    {
        try
        {
            TestStatus result = runThread();
            m_resultCollector.addResult(result.getCode(), result.getDescription());
        }
        catch (const TestException &e)
        {
            m_resultCollector.addResult(e.getTestResult(), e.getMessage());
        }
        catch (const exception &e)
        {
            m_resultCollector.addResult(QP_TEST_RESULT_FAIL, e.what());
        }
        catch (...)
        {
            m_resultCollector.addResult(QP_TEST_RESULT_FAIL, "Exception");
        }

        m_barrier->removeThread(de::SpinBarrier::WAIT_MODE_AUTO);
    }

    ResultCollector m_resultCollector;
    de::SpinBarrier *m_barrier;
};

void ThreadGroupThread::start(de::SpinBarrier *groupBarrier)
{
    m_barrier = groupBarrier;
    de::Thread::start();
}

inline void ThreadGroupThread::barrier(void)
{
    m_barrier->sync(de::SpinBarrier::WAIT_MODE_AUTO);
}

class ThreadGroup
{
    typedef vector<SharedPtr<ThreadGroupThread>> ThreadVector;

public:
    ThreadGroup(void) : m_barrier(1)
    {
    }
    ~ThreadGroup(void)
    {
    }

    void add(MovePtr<ThreadGroupThread> thread)
    {
        m_threads.push_back(SharedPtr<ThreadGroupThread>(thread.release()));
    }

    TestStatus run(void)
    {
        ResultCollector resultCollector;

        m_barrier.reset((int)m_threads.size());

        for (ThreadVector::iterator threadIter = m_threads.begin(); threadIter != m_threads.end(); ++threadIter)
            (*threadIter)->start(&m_barrier);

        for (ThreadVector::iterator threadIter = m_threads.begin(); threadIter != m_threads.end(); ++threadIter)
        {
            ResultCollector &threadResult = (*threadIter)->getResultCollector();
            (*threadIter)->join();
            resultCollector.addResult(threadResult.getResult(), threadResult.getMessage());
        }

        return TestStatus(resultCollector.getResult(), resultCollector.getMessage());
    }

private:
    ThreadVector m_threads;
    de::SpinBarrier m_barrier;
};

class CreateComputeThread : public ThreadGroupThread
{
public:
    CreateComputeThread(const Context &context, VkPipelineCache pipelineCache,
                        vector<VkComputePipelineCreateInfo> &pipelineInfo, const VkPipelineLayout &pipelineLayout,
                        const VkDescriptorSetLayout &descriptorSetLayout, MultiQueues &queues,
                        const vector<uint32_t> &shadersExecutions)
        : ThreadGroupThread(context, pipelineCache, pipelineLayout, descriptorSetLayout, queues, shadersExecutions)
        , m_pipelineInfo(pipelineInfo)
    {
    }

    TestStatus runThread(void)
    {
        ResultCollector resultCollector;
        for (int executionNdx = 0; executionNdx < EXECUTION_PER_THREAD; ++executionNdx)
        {
            const int shaderNdx       = executionNdx % (int)m_pipelineInfo.size();
            const DeviceInterface &vk = m_context.getDeviceInterface();
            const VkDevice device     = m_queues.getDevice();
            Move<VkPipeline> pipeline = createComputePipeline(vk, device, m_pipelineCache, &m_pipelineInfo[shaderNdx]);

            TestStatus result = executeComputePipeline(m_context, *pipeline, m_pipelineLayout, m_descriptorSetLayout,
                                                       m_queues, m_shadersExecutions[shaderNdx]);

#ifdef CTS_USES_VULKANSC
            // While collecting pipelines, synchronize between all threads for each pipeline that gets
            // created, so we will reserve the maximum amount of pipeline pool space that could need.
            if (!m_context.getTestContext().getCommandLine().isSubProcess())
            {
                barrier();
            }
#endif

            resultCollector.addResult(result.getCode(), result.getDescription());
        }
        return TestStatus(resultCollector.getResult(), resultCollector.getMessage());
    }

private:
    vector<VkComputePipelineCreateInfo> &m_pipelineInfo;
};

class CreateGraphicThread : public ThreadGroupThread
{
public:
    CreateGraphicThread(const Context &context, VkPipelineCache pipelineCache,
                        vector<VkGraphicsPipelineCreateInfo> &pipelineInfo, const VkPipelineLayout &pipelineLayout,
                        const VkDescriptorSetLayout &descriptorSetLayout, MultiQueues &queues,
                        const VkRenderPass &renderPass, const vector<uint32_t> &shadersExecutions)
        : ThreadGroupThread(context, pipelineCache, pipelineLayout, descriptorSetLayout, queues, shadersExecutions)
        , m_pipelineInfo(pipelineInfo)
        , m_renderPass(renderPass)
    {
    }

    TestStatus runThread(void)
    {
        ResultCollector resultCollector;
        for (int executionNdx = 0; executionNdx < EXECUTION_PER_THREAD; ++executionNdx)
        {
            const int shaderNdx       = executionNdx % (int)m_pipelineInfo.size();
            const DeviceInterface &vk = m_context.getDeviceInterface();
            const VkDevice device     = m_queues.getDevice();
            Move<VkPipeline> pipeline = createGraphicsPipeline(vk, device, m_pipelineCache, &m_pipelineInfo[shaderNdx]);

            TestStatus result = executeGraphicPipeline(m_context, *pipeline, m_pipelineLayout, m_descriptorSetLayout,
                                                       m_queues, m_renderPass, m_shadersExecutions[shaderNdx]);

#ifdef CTS_USES_VULKANSC
            // While collecting pipelines, synchronize between all threads for each pipeline that gets
            // created, so we will reserve the maximum amount of pipeline pool space that could need.
            if (!m_context.getTestContext().getCommandLine().isSubProcess())
            {
                barrier();
            }
#endif

            resultCollector.addResult(result.getCode(), result.getDescription());
        }
        return TestStatus(resultCollector.getResult(), resultCollector.getMessage());
    }

private:
    vector<VkGraphicsPipelineCreateInfo> &m_pipelineInfo;
    const VkRenderPass &m_renderPass;
};

class PipelineCacheComputeTestInstance : public TestInstance
{
    typedef vector<SharedPtr<Unique<VkShaderModule>>> ShaderModuleVector;

public:
    PipelineCacheComputeTestInstance(Context &context, const vector<uint32_t> &shadersExecutions)
        : TestInstance(context)
        , m_shadersExecutions(shadersExecutions)

    {
    }

    TestStatus iterate(void)
    {
#ifdef CTS_USES_VULKANSC
        MultithreadedDestroyGuard mdGuard(m_context.getResourceInterface());
#endif // CTS_USES_VULKANSC
        const CustomInstance instance(createCustomInstanceFromContext(m_context));
        const InstanceDriver &instanceDriver(instance.getDriver());

        MovePtr<MultiQueues> queues          = createQueues(m_context, VK_QUEUE_COMPUTE_BIT, instance, instanceDriver);
        const DeviceInterface &vk            = queues->getDeviceInterface();
        const VkDevice device                = queues->getDevice();
        ShaderModuleVector shaderCompModules = addShaderModules(device);
        Buffer resultBuffer(vk, device, *queues->m_allocator,
                            makeBufferCreateInfo(BUFFER_SIZE, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
                            MemoryRequirement::HostVisible);
        const Move<VkDescriptorSetLayout> descriptorSetLayout(
            DescriptorSetLayoutBuilder()
                .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
                .build(vk, device));
        const Move<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
        vector<VkPipelineShaderStageCreateInfo> shaderStageInfos = addShaderStageInfo(shaderCompModules);
        vector<VkComputePipelineCreateInfo> pipelineInfo         = addPipelineInfo(*pipelineLayout, shaderStageInfos);
        const VkPipelineCacheCreateInfo pipelineCacheInfo        = {
            VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType             sType;
            DE_NULL,                                      // const void*                 pNext;
#ifndef CTS_USES_VULKANSC
            0u,      // VkPipelineCacheCreateFlags  flags;
            0u,      // uintptr_t                   initialDataSize;
            DE_NULL, // const void*                 pInitialData;
#else
            VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
                VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
            m_context.getResourceInterface()->getCacheDataSize(),     // uintptr_t initialDataSize;
            m_context.getResourceInterface()->getCacheData()          // const void* pInitialData;
#endif // CTS_USES_VULKANSC
        };
        Move<VkPipelineCache> pipelineCache = createPipelineCache(vk, device, &pipelineCacheInfo);
        Move<VkPipeline> pipeline           = createComputePipeline(vk, device, *pipelineCache, &pipelineInfo[0]);
#ifndef CTS_USES_VULKANSC
        const uint32_t numThreads = clamp(deGetNumAvailableLogicalCores(), 4u, 32u);
#else
        const uint32_t numThreads = 2u;
#endif // CTS_USES_VULKANSC
        ThreadGroup threads;

        executeComputePipeline(m_context, *pipeline, *pipelineLayout, *descriptorSetLayout, *queues,
                               m_shadersExecutions[0]);

        for (uint32_t ndx = 0; ndx < numThreads; ++ndx)
            threads.add(MovePtr<ThreadGroupThread>(new CreateComputeThread(m_context, *pipelineCache, pipelineInfo,
                                                                           *pipelineLayout, *descriptorSetLayout,
                                                                           *queues, m_shadersExecutions)));

        {
            TestStatus thread_result = threads.run();
            if (thread_result.getCode())
            {
                return thread_result;
            }
        }
        return TestStatus::pass("Passed");
    }

private:
    ShaderModuleVector addShaderModules(const VkDevice &device)
    {
        const DeviceInterface &vk = m_context.getDeviceInterface();
        ShaderModuleVector shaderCompModules;
        shaderCompModules.resize(m_shadersExecutions.size());
        for (int shaderNdx = 0; shaderNdx < static_cast<int>(m_shadersExecutions.size()); ++shaderNdx)
        {
            ostringstream shaderName;
            shaderName << "compute_" << shaderNdx;
            shaderCompModules[shaderNdx] =
                SharedPtr<Unique<VkShaderModule>>(new Unique<VkShaderModule>(createShaderModule(
                    vk, device, m_context.getBinaryCollection().get(shaderName.str()), (VkShaderModuleCreateFlags)0)));
        }
        return shaderCompModules;
    }

    vector<VkPipelineShaderStageCreateInfo> addShaderStageInfo(const ShaderModuleVector &shaderCompModules)
    {
        VkPipelineShaderStageCreateInfo shaderStageInfo;
        vector<VkPipelineShaderStageCreateInfo> shaderStageInfos;
        shaderStageInfo.sType               = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
        shaderStageInfo.pNext               = DE_NULL;
        shaderStageInfo.flags               = (VkPipelineShaderStageCreateFlags)0;
        shaderStageInfo.stage               = VK_SHADER_STAGE_COMPUTE_BIT;
        shaderStageInfo.pName               = "main";
        shaderStageInfo.pSpecializationInfo = DE_NULL;

        for (int shaderNdx = 0; shaderNdx < static_cast<int>(m_shadersExecutions.size()); ++shaderNdx)
        {
            shaderStageInfo.module = *(*shaderCompModules[shaderNdx]);
            shaderStageInfos.push_back(shaderStageInfo);
        }
        return shaderStageInfos;
    }

    vector<VkComputePipelineCreateInfo> addPipelineInfo(VkPipelineLayout pipelineLayout,
                                                        const vector<VkPipelineShaderStageCreateInfo> &shaderStageInfos)
    {
        vector<VkComputePipelineCreateInfo> pipelineInfos;
        VkComputePipelineCreateInfo computePipelineInfo;
        computePipelineInfo.sType              = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
        computePipelineInfo.pNext              = DE_NULL;
        computePipelineInfo.flags              = (VkPipelineCreateFlags)0;
        computePipelineInfo.layout             = pipelineLayout;
        computePipelineInfo.basePipelineHandle = DE_NULL;
        computePipelineInfo.basePipelineIndex  = 0;

        for (int shaderNdx = 0; shaderNdx < static_cast<int>(m_shadersExecutions.size()); ++shaderNdx)
        {
            computePipelineInfo.stage = shaderStageInfos[shaderNdx];
            pipelineInfos.push_back(computePipelineInfo);
        }
        return pipelineInfos;
    }

    const vector<uint32_t> m_shadersExecutions;
};

class PipelineCacheGraphicTestInstance : public TestInstance
{
    typedef vector<SharedPtr<Unique<VkShaderModule>>> ShaderModuleVector;

public:
    PipelineCacheGraphicTestInstance(Context &context, const vector<uint32_t> &shadersExecutions)
        : TestInstance(context)
        , m_shadersExecutions(shadersExecutions)

    {
    }

    TestStatus iterate(void)
    {
#ifdef CTS_USES_VULKANSC
        MultithreadedDestroyGuard mdGuard(m_context.getResourceInterface());
#endif // CTS_USES_VULKANSC
        const CustomInstance instance(createCustomInstanceFromContext(m_context));
        const InstanceDriver &instanceDriver(instance.getDriver());
        const VkPhysicalDevice physicalDevice =
            chooseDevice(instanceDriver, instance, m_context.getTestContext().getCommandLine());
        requireFeatures(instanceDriver, physicalDevice, FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);

        MovePtr<MultiQueues> queues   = createQueues(m_context, VK_QUEUE_GRAPHICS_BIT, instance, instanceDriver);
        const DeviceInterface &vk     = m_context.getDeviceInterface();
        const VkDevice device         = queues->getDevice();
        VkFormat colorFormat          = VK_FORMAT_R8G8B8A8_UNORM;
        Move<VkRenderPass> renderPass = makeRenderPass(vk, device, colorFormat);
        const Move<VkDescriptorSetLayout> descriptorSetLayout(
            DescriptorSetLayoutBuilder()
                .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_VERTEX_BIT)
                .build(vk, device));
        ShaderModuleVector shaderGraphicModules = addShaderModules(device);
        const Move<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
        vector<VkPipelineShaderStageCreateInfo> shaderStageInfos = addShaderStageInfo(shaderGraphicModules);
        vector<VkGraphicsPipelineCreateInfo> pipelineInfo =
            addPipelineInfo(*pipelineLayout, shaderStageInfos, *renderPass);
        const VkPipelineCacheCreateInfo pipelineCacheInfo = {
            VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType             sType;
            DE_NULL,                                      // const void*                 pNext;
#ifndef CTS_USES_VULKANSC
            0u,     // VkPipelineCacheCreateFlags  flags;
            0u,     // uintptr_t                   initialDataSize;
            DE_NULL // const void*                 pInitialData;
#else
            VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
                VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
            m_context.getResourceInterface()->getCacheDataSize(),     // uintptr_t initialDataSize;
            m_context.getResourceInterface()->getCacheData()          // const void* pInitialData;
#endif // CTS_USES_VULKANSC
        };
        Move<VkPipelineCache> pipelineCache = createPipelineCache(vk, device, &pipelineCacheInfo);
        Move<VkPipeline> pipeline           = createGraphicsPipeline(vk, device, *pipelineCache, &pipelineInfo[0]);
#ifndef CTS_USES_VULKANSC
        const uint32_t numThreads = clamp(deGetNumAvailableLogicalCores(), 4u, 32u);
#else
        const uint32_t numThreads = 2u;
#endif // CTS_USES_VULKANSC
        ThreadGroup threads;

        executeGraphicPipeline(m_context, *pipeline, *pipelineLayout, *descriptorSetLayout, *queues, *renderPass,
                               m_shadersExecutions[0]);

        for (uint32_t ndx = 0; ndx < numThreads; ++ndx)
            threads.add(MovePtr<ThreadGroupThread>(new CreateGraphicThread(m_context, *pipelineCache, pipelineInfo,
                                                                           *pipelineLayout, *descriptorSetLayout,
                                                                           *queues, *renderPass, m_shadersExecutions)));

        {
            TestStatus thread_result = threads.run();
            if (thread_result.getCode())
            {
                return thread_result;
            }
        }
        return TestStatus::pass("Passed");
    }

private:
    ShaderModuleVector addShaderModules(const VkDevice &device)
    {
        const DeviceInterface &vk = m_context.getDeviceInterface();
        ShaderModuleVector shaderModules;
        shaderModules.resize(m_shadersExecutions.size() + 1);
        for (int shaderNdx = 0; shaderNdx < static_cast<int>(m_shadersExecutions.size()); ++shaderNdx)
        {
            ostringstream shaderName;
            shaderName << "vert_" << shaderNdx;
            shaderModules[shaderNdx] = SharedPtr<Unique<VkShaderModule>>(new Unique<VkShaderModule>(createShaderModule(
                vk, device, m_context.getBinaryCollection().get(shaderName.str()), (VkShaderModuleCreateFlags)0)));
        }
        shaderModules[m_shadersExecutions.size()] = SharedPtr<Unique<VkShaderModule>>(new Unique<VkShaderModule>(
            createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), (VkShaderModuleCreateFlags)0)));
        return shaderModules;
    }

    vector<VkPipelineShaderStageCreateInfo> addShaderStageInfo(const ShaderModuleVector &shaderCompModules)
    {
        VkPipelineShaderStageCreateInfo shaderStageInfo;
        vector<VkPipelineShaderStageCreateInfo> shaderStageInfos;
        shaderStageInfo.sType               = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
        shaderStageInfo.pNext               = DE_NULL;
        shaderStageInfo.flags               = (VkPipelineShaderStageCreateFlags)0;
        shaderStageInfo.pName               = "main";
        shaderStageInfo.pSpecializationInfo = DE_NULL;

        for (int shaderNdx = 0; shaderNdx < static_cast<int>(m_shadersExecutions.size()); ++shaderNdx)
        {
            shaderStageInfo.stage  = VK_SHADER_STAGE_VERTEX_BIT;
            shaderStageInfo.module = *(*shaderCompModules[shaderNdx]);
            shaderStageInfos.push_back(shaderStageInfo);

            shaderStageInfo.stage  = VK_SHADER_STAGE_FRAGMENT_BIT;
            shaderStageInfo.module = *(*shaderCompModules[m_shadersExecutions.size()]);
            shaderStageInfos.push_back(shaderStageInfo);
        }
        return shaderStageInfos;
    }

    vector<VkGraphicsPipelineCreateInfo> addPipelineInfo(
        VkPipelineLayout pipelineLayout, const vector<VkPipelineShaderStageCreateInfo> &shaderStageInfos,
        const VkRenderPass &renderPass)
    {
        VkExtent3D colorImageExtent = makeExtent3D(1u, 1u, 1u);
        vector<VkGraphicsPipelineCreateInfo> pipelineInfo;

        m_vertexInputStateParams.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
        m_vertexInputStateParams.pNext = DE_NULL;
        m_vertexInputStateParams.flags = 0u;
        m_vertexInputStateParams.vertexBindingDescriptionCount   = 0u;
        m_vertexInputStateParams.pVertexBindingDescriptions      = DE_NULL;
        m_vertexInputStateParams.vertexAttributeDescriptionCount = 0u;
        m_vertexInputStateParams.pVertexAttributeDescriptions    = DE_NULL;

        m_inputAssemblyStateParams.sType                  = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
        m_inputAssemblyStateParams.pNext                  = DE_NULL;
        m_inputAssemblyStateParams.flags                  = 0u;
        m_inputAssemblyStateParams.topology               = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
        m_inputAssemblyStateParams.primitiveRestartEnable = VK_FALSE;

        m_viewport.x        = 0.0f;
        m_viewport.y        = 0.0f;
        m_viewport.width    = (float)colorImageExtent.width;
        m_viewport.height   = (float)colorImageExtent.height;
        m_viewport.minDepth = 0.0f;
        m_viewport.maxDepth = 1.0f;

        //TODO
        m_scissor.offset.x      = 0;
        m_scissor.offset.y      = 0;
        m_scissor.extent.width  = colorImageExtent.width;
        m_scissor.extent.height = colorImageExtent.height;

        m_viewportStateParams.sType         = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
        m_viewportStateParams.pNext         = DE_NULL;
        m_viewportStateParams.flags         = 0u;
        m_viewportStateParams.viewportCount = 1u;
        m_viewportStateParams.pViewports    = &m_viewport;
        m_viewportStateParams.scissorCount  = 1u;
        m_viewportStateParams.pScissors     = &m_scissor;

        m_rasterStateParams.sType                   = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
        m_rasterStateParams.pNext                   = DE_NULL;
        m_rasterStateParams.flags                   = 0u;
        m_rasterStateParams.depthClampEnable        = VK_FALSE;
        m_rasterStateParams.rasterizerDiscardEnable = VK_FALSE;
        m_rasterStateParams.polygonMode             = VK_POLYGON_MODE_FILL;
        m_rasterStateParams.cullMode                = VK_CULL_MODE_NONE;
        m_rasterStateParams.frontFace               = VK_FRONT_FACE_COUNTER_CLOCKWISE;
        m_rasterStateParams.depthBiasEnable         = VK_FALSE;
        m_rasterStateParams.depthBiasConstantFactor = 0.0f;
        m_rasterStateParams.depthBiasClamp          = 0.0f;
        m_rasterStateParams.depthBiasSlopeFactor    = 0.0f;
        m_rasterStateParams.lineWidth               = 1.0f;

        m_colorBlendAttachmentState.blendEnable         = VK_FALSE;
        m_colorBlendAttachmentState.srcColorBlendFactor = VK_BLEND_FACTOR_ONE;
        m_colorBlendAttachmentState.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
        m_colorBlendAttachmentState.colorBlendOp        = VK_BLEND_OP_ADD;
        m_colorBlendAttachmentState.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE;
        m_colorBlendAttachmentState.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
        m_colorBlendAttachmentState.alphaBlendOp        = VK_BLEND_OP_ADD;
        m_colorBlendAttachmentState.colorWriteMask =
            VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;

        m_colorBlendStateParams.sType             = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
        m_colorBlendStateParams.pNext             = DE_NULL;
        m_colorBlendStateParams.flags             = 0u;
        m_colorBlendStateParams.logicOpEnable     = VK_FALSE;
        m_colorBlendStateParams.logicOp           = VK_LOGIC_OP_COPY;
        m_colorBlendStateParams.attachmentCount   = 1u;
        m_colorBlendStateParams.pAttachments      = &m_colorBlendAttachmentState;
        m_colorBlendStateParams.blendConstants[0] = 0.0f;
        m_colorBlendStateParams.blendConstants[1] = 0.0f;
        m_colorBlendStateParams.blendConstants[2] = 0.0f;
        m_colorBlendStateParams.blendConstants[3] = 0.0f;

        m_multisampleStateParams.sType                 = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
        m_multisampleStateParams.pNext                 = DE_NULL;
        m_multisampleStateParams.flags                 = 0u;
        m_multisampleStateParams.rasterizationSamples  = VK_SAMPLE_COUNT_1_BIT;
        m_multisampleStateParams.sampleShadingEnable   = VK_FALSE;
        m_multisampleStateParams.minSampleShading      = 0.0f;
        m_multisampleStateParams.pSampleMask           = DE_NULL;
        m_multisampleStateParams.alphaToCoverageEnable = VK_FALSE;
        m_multisampleStateParams.alphaToOneEnable      = VK_FALSE;

        m_depthStencilStateParams.sType                 = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
        m_depthStencilStateParams.pNext                 = DE_NULL;
        m_depthStencilStateParams.flags                 = 0u;
        m_depthStencilStateParams.depthTestEnable       = VK_TRUE;
        m_depthStencilStateParams.depthWriteEnable      = VK_TRUE;
        m_depthStencilStateParams.depthCompareOp        = VK_COMPARE_OP_LESS_OR_EQUAL;
        m_depthStencilStateParams.depthBoundsTestEnable = VK_FALSE;
        m_depthStencilStateParams.stencilTestEnable     = VK_FALSE;
        m_depthStencilStateParams.front.failOp          = VK_STENCIL_OP_KEEP;
        m_depthStencilStateParams.front.passOp          = VK_STENCIL_OP_KEEP;
        m_depthStencilStateParams.front.depthFailOp     = VK_STENCIL_OP_KEEP;
        m_depthStencilStateParams.front.compareOp       = VK_COMPARE_OP_NEVER;
        m_depthStencilStateParams.front.compareMask     = 0u;
        m_depthStencilStateParams.front.writeMask       = 0u;
        m_depthStencilStateParams.front.reference       = 0u;
        m_depthStencilStateParams.back.failOp           = VK_STENCIL_OP_KEEP;
        m_depthStencilStateParams.back.passOp           = VK_STENCIL_OP_KEEP;
        m_depthStencilStateParams.back.depthFailOp      = VK_STENCIL_OP_KEEP;
        m_depthStencilStateParams.back.compareOp        = VK_COMPARE_OP_NEVER;
        m_depthStencilStateParams.back.compareMask      = 0u;
        m_depthStencilStateParams.back.writeMask        = 0u;
        m_depthStencilStateParams.back.reference        = 0u;
        m_depthStencilStateParams.minDepthBounds        = 0.0f;
        m_depthStencilStateParams.maxDepthBounds        = 1.0f;

        VkGraphicsPipelineCreateInfo graphicsPipelineParams = {
            VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
            DE_NULL,                                         // const void* pNext;
            0u,                                              // VkPipelineCreateFlags flags;
            2u,                                              // uint32_t stageCount;
            DE_NULL,                                         // const VkPipelineShaderStageCreateInfo* pStages;
            &m_vertexInputStateParams,   // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
            &m_inputAssemblyStateParams, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
            DE_NULL,                     // const VkPipelineTessellationStateCreateInfo* pTessellationState;
            &m_viewportStateParams,      // const VkPipelineViewportStateCreateInfo* pViewportState;
            &m_rasterStateParams,        // const VkPipelineRasterizationStateCreateInfo* pRasterState;
            &m_multisampleStateParams,   // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
            &m_depthStencilStateParams,  // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
            &m_colorBlendStateParams,    // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
            (const VkPipelineDynamicStateCreateInfo *)DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
            pipelineLayout,                                    // VkPipelineLayout layout;
            renderPass,                                        // VkRenderPass renderPass;
            0u,                                                // uint32_t subpass;
            DE_NULL,                                           // VkPipeline basePipelineHandle;
            0,                                                 // int32_t basePipelineIndex;
        };
        for (int shaderNdx = 0; shaderNdx < static_cast<int>(m_shadersExecutions.size()) * 2; shaderNdx += 2)
        {
            graphicsPipelineParams.pStages = &shaderStageInfos[shaderNdx];
            pipelineInfo.push_back(graphicsPipelineParams);
        }
        return pipelineInfo;
    }

    const vector<uint32_t> m_shadersExecutions;
    VkPipelineVertexInputStateCreateInfo m_vertexInputStateParams;
    VkPipelineInputAssemblyStateCreateInfo m_inputAssemblyStateParams;
    VkViewport m_viewport;
    VkRect2D m_scissor;
    VkPipelineViewportStateCreateInfo m_viewportStateParams;
    VkPipelineRasterizationStateCreateInfo m_rasterStateParams;
    VkPipelineColorBlendAttachmentState m_colorBlendAttachmentState;
    VkPipelineColorBlendStateCreateInfo m_colorBlendStateParams;
    VkPipelineMultisampleStateCreateInfo m_multisampleStateParams;
    VkPipelineDepthStencilStateCreateInfo m_depthStencilStateParams;
};

class PipelineCacheComputeTest : public TestCase
{
public:
    PipelineCacheComputeTest(TestContext &testCtx, const string &name) : TestCase(testCtx, name)
    {
    }

    void initPrograms(SourceCollections &programCollection) const
    {
        ostringstream buffer;
        buffer << "layout(set = 0, binding = 0, std430) buffer Output\n"
               << "{\n"
               << "    int result[];\n"
               << "} sb_out;\n";
        {
            ostringstream src;
            src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_310_ES) << "\n"
                << "\n"
                << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
                << "\n"
                << buffer.str() << "void main (void)\n"
                << "{\n"
                << "    highp uint ndx = gl_GlobalInvocationID.x;\n"
                << "    sb_out.result[ndx] = int(ndx);\n"
                << "}\n";
            programCollection.glslSources.add("compute_0") << glu::ComputeSource(src.str());
        }
        {
            ostringstream src;
            src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_310_ES) << "\n"
                << "\n"
                << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
                << "\n"
                << buffer.str() << "void main (void)\n"
                << "{\n"
                << "    for (highp uint ndx = 0u; ndx < " << BUFFER_ELEMENT_COUNT << "u; ndx++)\n"
                << "    {\n"
                << "        sb_out.result[ndx] = int(ndx);\n"
                << "    }\n"
                << "}\n";
            programCollection.glslSources.add("compute_1") << glu::ComputeSource(src.str());
        }
        {
            ostringstream src;
            src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_310_ES) << "\n"
                << "\n"
                << "layout(local_size_x = " << BUFFER_ELEMENT_COUNT << ", local_size_y = 1, local_size_z = 1) in;\n"
                << "\n"
                << buffer.str() << "void main (void)\n"
                << "{\n"
                << "    highp uint ndx = gl_LocalInvocationID.x;\n"
                << "    sb_out.result[ndx] = int(ndx);\n"
                << "}\n";
            programCollection.glslSources.add("compute_2") << glu::ComputeSource(src.str());
        }
    }

    TestInstance *createInstance(Context &context) const
    {
        vector<uint32_t> shadersExecutions;
        shadersExecutions.push_back(16u); //compute_0
        shadersExecutions.push_back(1u);  //compute_1
        shadersExecutions.push_back(1u);  //compute_2
        return new PipelineCacheComputeTestInstance(context, shadersExecutions);
    }
};

class PipelineCacheGraphicTest : public TestCase
{
public:
    PipelineCacheGraphicTest(TestContext &testCtx, const string &name) : TestCase(testCtx, name)
    {
    }

    void initPrograms(SourceCollections &programCollection) const
    {
        ostringstream buffer;
        buffer << "layout(set = 0, binding = 0, std430) buffer Output\n"
               << "{\n"
               << "    int result[];\n"
               << "} sb_out;\n";

        // Vertex
        {
            std::ostringstream src;
            src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_440) << "\n"
                << "\n"
                << buffer.str() << "\n"
                << "void main (void)\n"
                << "{\n"
                << "   sb_out.result[gl_VertexIndex] = int(gl_VertexIndex);\n"
                << "   gl_PointSize = 1.0f;\n"
                << "}\n";
            programCollection.glslSources.add("vert_0") << glu::VertexSource(src.str());
        }
        // Vertex
        {
            std::ostringstream src;
            src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_440) << "\n"
                << "\n"
                << buffer.str() << "\n"
                << "void main (void)\n"
                << "{\n"
                << "    for (highp uint ndx = 0u; ndx < " << BUFFER_ELEMENT_COUNT << "u; ndx++)\n"
                << "    {\n"
                << "        sb_out.result[ndx] = int(ndx);\n"
                << "    }\n"
                << "    gl_PointSize = 1.0f;\n"
                << "}\n";
            programCollection.glslSources.add("vert_1") << glu::VertexSource(src.str());
        }
        // Vertex
        {
            std::ostringstream src;
            src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_440) << "\n"
                << "\n"
                << buffer.str() << "\n"
                << "void main (void)\n"
                << "{\n"
                << "    for (int ndx = " << BUFFER_ELEMENT_COUNT - 1 << "; ndx >= 0; ndx--)\n"
                << "    {\n"
                << "        sb_out.result[uint(ndx)] = ndx;\n"
                << "    }\n"
                << "    gl_PointSize = 1.0f;\n"
                << "}\n";
            programCollection.glslSources.add("vert_2") << glu::VertexSource(src.str());
        }
        // Fragment
        {
            std::ostringstream src;
            src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_440) << "\n"
                << "\n"
                << "layout(location = 0) out vec4 o_color;\n"
                << "\n"
                << "void main (void)\n"
                << "{\n"
                << "    o_color = vec4(1.0);\n"
                << "}\n";
            programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
        }
    }

    TestInstance *createInstance(Context &context) const
    {
        vector<uint32_t> shadersExecutions;
        shadersExecutions.push_back(16u); //vert_0
        shadersExecutions.push_back(1u);  //vert_1
        shadersExecutions.push_back(1u);  //vert_2
        return new PipelineCacheGraphicTestInstance(context, shadersExecutions);
    }
};

} // namespace

tcu::TestCaseGroup *createInternallySynchronizedObjects(tcu::TestContext &testCtx)
{
    // Internally synchronized objects
    de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "internally_synchronized_objects"));
    // Internally synchronized object VkPipelineCache for compute pipeline is tested
    tests->addChild(new PipelineCacheComputeTest(testCtx, "pipeline_cache_compute"));
    // Internally synchronized object VkPipelineCache for graphics pipeline is tested
    tests->addChild(new PipelineCacheGraphicTest(testCtx, "pipeline_cache_graphics"));
    return tests.release();
}

} // namespace synchronization
} // namespace vkt