1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 The Android Open Source Project
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief Indirect Compute Dispatch tests
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktComputeIndirectComputeDispatchTests.hpp"
28 #include "vktComputeTestsUtil.hpp"
29 #include "vktCustomInstancesDevices.hpp"
30 #include "vkSafetyCriticalUtil.hpp"
31 
32 #include <string>
33 #include <map>
34 #include <vector>
35 
36 #include "vkDefs.hpp"
37 #include "vkRef.hpp"
38 #include "vkRefUtil.hpp"
39 #include "vktTestCase.hpp"
40 #include "vktTestCaseUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkBarrierUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkQueryUtil.hpp"
47 #include "vkDeviceUtil.hpp"
48 #include "vkCmdUtil.hpp"
49 #include "vkObjUtil.hpp"
50 #include "vkBufferWithMemory.hpp"
51 
52 #include "tcuVector.hpp"
53 #include "tcuVectorUtil.hpp"
54 #include "tcuTestLog.hpp"
55 #include "tcuRGBA.hpp"
56 #include "tcuStringTemplate.hpp"
57 
58 #include "deUniquePtr.hpp"
59 #include "deSharedPtr.hpp"
60 #include "deStringUtil.hpp"
61 #include "deArrayUtil.hpp"
62 
63 #include "gluShaderUtil.hpp"
64 #include "tcuCommandLine.hpp"
65 
66 #include <set>
67 
68 namespace vkt
69 {
70 namespace compute
71 {
72 namespace
73 {
removeCoreExtensions(const std::vector<std::string> & supportedExtensions,const std::vector<const char * > & coreExtensions)74 std::vector<std::string> removeCoreExtensions(const std::vector<std::string> &supportedExtensions,
75                                               const std::vector<const char *> &coreExtensions)
76 {
77     std::vector<std::string> nonCoreExtensions;
78     std::set<std::string> excludedExtensions(coreExtensions.begin(), coreExtensions.end());
79 
80     for (const auto &supportedExtension : supportedExtensions)
81     {
82         if (!de::contains(excludedExtensions, supportedExtension))
83             nonCoreExtensions.push_back(supportedExtension);
84     }
85 
86     return nonCoreExtensions;
87 }
88 
89 // Creates a device that has a queue for compute capabilities without graphics.
createCustomDevice(Context & context,const vkt::CustomInstance & customInstance,uint32_t & queueFamilyIndex)90 vk::Move<vk::VkDevice> createCustomDevice(Context &context,
91 #ifdef CTS_USES_VULKANSC
92                                           const vkt::CustomInstance &customInstance,
93 #endif // CTS_USES_VULKANSC
94                                           uint32_t &queueFamilyIndex)
95 {
96 #ifdef CTS_USES_VULKANSC
97     const vk::InstanceInterface &instanceDriver = customInstance.getDriver();
98     const vk::VkPhysicalDevice physicalDevice =
99         chooseDevice(instanceDriver, customInstance, context.getTestContext().getCommandLine());
100 #else
101     const vk::InstanceInterface &instanceDriver = context.getInstanceInterface();
102     const vk::VkPhysicalDevice physicalDevice   = context.getPhysicalDevice();
103 #endif // CTS_USES_VULKANSC
104 
105     const std::vector<vk::VkQueueFamilyProperties> queueFamilies =
106         getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
107 
108     queueFamilyIndex = 0;
109     for (const auto &queueFamily : queueFamilies)
110     {
111         if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
112             !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT) &&
113             queueFamilyIndex != context.getUniversalQueueFamilyIndex())
114             break;
115         else
116             queueFamilyIndex++;
117     }
118 
119     // One queue family without a graphics bit should be found, since this is checked in checkSupport.
120     DE_ASSERT(queueFamilyIndex < queueFamilies.size());
121 
122     const float queuePriority                                  = 1.0f;
123     const vk::VkDeviceQueueCreateInfo deviceQueueCreateInfos[] = {
124         {
125             vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
126             DE_NULL,                                        // const void* pNext;
127             (vk::VkDeviceQueueCreateFlags)0u,               // VkDeviceQueueCreateFlags flags;
128             context.getUniversalQueueFamilyIndex(),         // uint32_t queueFamilyIndex;
129             1u,                                             // uint32_t queueCount;
130             &queuePriority,                                 // const float* pQueuePriorities;
131         },
132         {
133             vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
134             DE_NULL,                                        // const void* pNext;
135             (vk::VkDeviceQueueCreateFlags)0u,               // VkDeviceQueueCreateFlags flags;
136             queueFamilyIndex,                               // uint32_t queueFamilyIndex;
137             1u,                                             // uint32_t queueCount;
138             &queuePriority,                                 // const float* pQueuePriorities;
139         }};
140 
141     // context.getDeviceExtensions() returns supported device extension including extensions that have been promoted to
142     // Vulkan core. The core extensions must be removed from the list.
143     std::vector<const char *> coreExtensions;
144     vk::getCoreDeviceExtensions(context.getUsedApiVersion(), coreExtensions);
145     std::vector<std::string> nonCoreExtensions(removeCoreExtensions(context.getDeviceExtensions(), coreExtensions));
146 
147     std::vector<const char *> extensionNames;
148     extensionNames.reserve(nonCoreExtensions.size());
149     for (const std::string &extension : nonCoreExtensions)
150         extensionNames.push_back(extension.c_str());
151 
152     const auto &deviceFeatures2 = context.getDeviceFeatures2();
153 
154     const void *pNext = &deviceFeatures2;
155 #ifdef CTS_USES_VULKANSC
156     VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ?
157                                                                  context.getResourceInterface()->getStatMax() :
158                                                                  resetDeviceObjectReservationCreateInfo();
159     memReservationInfo.pNext                               = pNext;
160     pNext                                                  = &memReservationInfo;
161 
162     VkPipelineCacheCreateInfo pcCI;
163     std::vector<VkPipelinePoolSize> poolSizes;
164     if (context.getTestContext().getCommandLine().isSubProcess())
165     {
166         if (context.getResourceInterface()->getCacheDataSize() > 0)
167         {
168             pcCI = {
169                 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
170                 DE_NULL,                                      // const void* pNext;
171                 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
172                     VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
173                 context.getResourceInterface()->getCacheDataSize(),       // uintptr_t initialDataSize;
174                 context.getResourceInterface()->getCacheData()            // const void* pInitialData;
175             };
176             memReservationInfo.pipelineCacheCreateInfoCount = 1;
177             memReservationInfo.pPipelineCacheCreateInfos    = &pcCI;
178         }
179         poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
180         if (!poolSizes.empty())
181         {
182             memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
183             memReservationInfo.pPipelinePoolSizes    = poolSizes.data();
184         }
185     }
186 #endif // CTS_USES_VULKANSC
187 
188     const vk::VkDeviceCreateInfo deviceCreateInfo = {
189         vk::VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,     // VkStructureType sType;
190         pNext,                                        // const void* pNext;
191         (vk::VkDeviceCreateFlags)0u,                  // VkDeviceCreateFlags flags;
192         DE_LENGTH_OF_ARRAY(deviceQueueCreateInfos),   // uint32_t queueCreateInfoCount;
193         deviceQueueCreateInfos,                       // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
194         0u,                                           // uint32_t enabledLayerCount;
195         DE_NULL,                                      // const char* const* ppEnabledLayerNames;
196         static_cast<uint32_t>(extensionNames.size()), // uint32_t enabledExtensionCount;
197         extensionNames.data(),                        // const char* const* ppEnabledExtensionNames;
198         DE_NULL,                                      // const VkPhysicalDeviceFeatures* pEnabledFeatures;
199     };
200 
201     return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
202                                    context.getPlatformInterface(),
203 #ifdef CTS_USES_VULKANSC
204                                    customInstance,
205 #else
206                                    context.getInstance(),
207 #endif
208                                    instanceDriver, physicalDevice, &deviceCreateInfo);
209 }
210 
211 enum
212 {
213     RESULT_BLOCK_BASE_SIZE         = 4 * (int)sizeof(uint32_t), // uvec3 + uint
214     RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(uint32_t),
215     INDIRECT_COMMAND_OFFSET        = 3 * (int)sizeof(uint32_t),
216 };
217 
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)218 vk::VkDeviceSize getResultBlockAlignedSize(const vk::InstanceInterface &instance_interface,
219                                            const vk::VkPhysicalDevice physicalDevice, const vk::VkDeviceSize baseSize)
220 {
221     // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
222     vk::VkPhysicalDeviceProperties deviceProperties;
223     instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
224     vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
225 
226     if (alignment == 0 || (baseSize % alignment == 0))
227         return baseSize;
228     else
229         return (baseSize / alignment + 1) * alignment;
230 }
231 
232 struct DispatchCommand
233 {
DispatchCommandvkt::compute::__anon8ba84f650111::DispatchCommand234     DispatchCommand(const intptr_t offset, const tcu::UVec3 &numWorkGroups)
235         : m_offset(offset)
236         , m_numWorkGroups(numWorkGroups)
237     {
238     }
239 
240     intptr_t m_offset;
241     tcu::UVec3 m_numWorkGroups;
242 };
243 
244 typedef std::vector<DispatchCommand> DispatchCommandsVec;
245 
246 struct DispatchCaseDesc
247 {
DispatchCaseDescvkt::compute::__anon8ba84f650111::DispatchCaseDesc248     DispatchCaseDesc(const char *name, const uintptr_t bufferSize, const tcu::UVec3 workGroupSize,
249                      const DispatchCommandsVec &dispatchCommands, const bool computeQueueOnly)
250         : m_name(name)
251         , m_bufferSize(bufferSize)
252         , m_workGroupSize(workGroupSize)
253         , m_dispatchCommands(dispatchCommands)
254         , m_computeOnlyQueue(computeQueueOnly)
255     {
256     }
257 
258     const char *m_name;
259     const uintptr_t m_bufferSize;
260     const tcu::UVec3 m_workGroupSize;
261     const DispatchCommandsVec m_dispatchCommands;
262     const bool m_computeOnlyQueue;
263 };
264 
265 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
266 {
267 public:
268     IndirectDispatchInstanceBufferUpload(Context &context, const std::string &name, const uintptr_t bufferSize,
269                                          const tcu::UVec3 &workGroupSize, const DispatchCommandsVec &dispatchCommands,
270                                          const bool computeQueueOnly,
271                                          const vk::ComputePipelineConstructionType computePipelineConstructionType);
272 
~IndirectDispatchInstanceBufferUpload(void)273     virtual ~IndirectDispatchInstanceBufferUpload(void)
274     {
275     }
276 
277     virtual tcu::TestStatus iterate(void);
278 
279 protected:
280     virtual void fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface &vkdi,
281                                         const vk::BufferWithMemory &indirectBuffer);
282 
283     bool verifyResultBuffer(const vk::BufferWithMemory &resultBuffer, const vk::DeviceInterface &vkdi,
284                             const vk::VkDeviceSize resultBlockSize) const;
285 
286     Context &m_context;
287     const std::string m_name;
288 
289     vk::VkDevice m_device;
290 #ifdef CTS_USES_VULKANSC
291     const CustomInstance m_customInstance;
292 #endif // CTS_USES_VULKANSC
293     vk::Move<vk::VkDevice> m_customDevice;
294 #ifndef CTS_USES_VULKANSC
295     de::MovePtr<vk::DeviceDriver> m_deviceDriver;
296 #else
297     de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> m_deviceDriver;
298 #endif // CTS_USES_VULKANSC
299 
300     vk::VkQueue m_queue;
301     uint32_t m_queueFamilyIndex;
302 
303     const uintptr_t m_bufferSize;
304     const tcu::UVec3 m_workGroupSize;
305     const DispatchCommandsVec m_dispatchCommands;
306 
307     de::MovePtr<vk::Allocator> m_allocator;
308 
309     const bool m_computeQueueOnly;
310     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
311 
312 private:
313     IndirectDispatchInstanceBufferUpload(const vkt::TestInstance &);
314     IndirectDispatchInstanceBufferUpload &operator=(const vkt::TestInstance &);
315 };
316 
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const uintptr_t bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeQueueOnly,const vk::ComputePipelineConstructionType computePipelineConstructionType)317 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload(
318     Context &context, const std::string &name, const uintptr_t bufferSize, const tcu::UVec3 &workGroupSize,
319     const DispatchCommandsVec &dispatchCommands, const bool computeQueueOnly,
320     const vk::ComputePipelineConstructionType computePipelineConstructionType)
321     : vkt::TestInstance(context)
322     , m_context(context)
323     , m_name(name)
324     , m_device(context.getDevice())
325 #ifdef CTS_USES_VULKANSC
326     , m_customInstance(createCustomInstanceFromContext(context))
327 #endif // CTS_USES_VULKANSC
328     , m_queue(context.getUniversalQueue())
329     , m_queueFamilyIndex(context.getUniversalQueueFamilyIndex())
330     , m_bufferSize(bufferSize)
331     , m_workGroupSize(workGroupSize)
332     , m_dispatchCommands(dispatchCommands)
333     , m_computeQueueOnly(computeQueueOnly)
334     , m_computePipelineConstructionType(computePipelineConstructionType)
335 {
336 }
337 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)338 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,
339                                                                   const vk::DeviceInterface &vkdi,
340                                                                   const vk::BufferWithMemory &indirectBuffer)
341 {
342     DE_UNREF(commandBuffer);
343 
344     const vk::Allocation &alloc = indirectBuffer.getAllocation();
345     uint8_t *indirectDataPtr    = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
346 
347     for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end();
348          ++cmdIter)
349     {
350         DE_ASSERT(cmdIter->m_offset >= 0);
351         DE_ASSERT(cmdIter->m_offset % sizeof(uint32_t) == 0);
352         DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (intptr_t)m_bufferSize);
353 
354         uint32_t *const dstPtr = (uint32_t *)&indirectDataPtr[cmdIter->m_offset];
355 
356         dstPtr[0] = cmdIter->m_numWorkGroups[0];
357         dstPtr[1] = cmdIter->m_numWorkGroups[1];
358         dstPtr[2] = cmdIter->m_numWorkGroups[2];
359     }
360 
361     vk::flushAlloc(vkdi, m_device, alloc);
362 }
363 
iterate(void)364 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate(void)
365 {
366 #ifdef CTS_USES_VULKANSC
367     const vk::InstanceInterface &vki = m_customInstance.getDriver();
368 #else
369     const vk::InstanceInterface &vki = m_context.getInstanceInterface();
370 #endif // CTS_USES_VULKANSC
371     tcu::TestContext &testCtx = m_context.getTestContext();
372 
373     testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize
374                      << tcu::TestLog::EndMessage;
375     {
376         tcu::ScopedLogSection section(testCtx.getLog(), "Commands",
377                                       "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) +
378                                           " in total)");
379 
380         for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
381         {
382             testCtx.getLog() << tcu::TestLog::Message << cmdNdx << ": "
383                              << "offset = " << m_dispatchCommands[cmdNdx].m_offset
384                              << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
385                              << tcu::TestLog::EndMessage;
386         }
387     }
388 
389     if (m_computeQueueOnly)
390     {
391         // m_queueFamilyIndex will be updated in createCustomDevice() to match the requested queue type.
392         m_customDevice = createCustomDevice(m_context,
393 #ifdef CTS_USES_VULKANSC
394                                             m_customInstance,
395 #endif
396                                             m_queueFamilyIndex);
397         m_device = m_customDevice.get();
398 #ifndef CTS_USES_VULKANSC
399         m_deviceDriver = de::MovePtr<vk::DeviceDriver>(
400             new vk::DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(), m_device,
401                                  m_context.getUsedApiVersion(), m_context.getTestContext().getCommandLine()));
402 #else
403         m_deviceDriver = de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter>(
404             new vk::DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, m_device,
405                                    m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
406                                    m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
407                                    m_context.getUsedApiVersion()),
408             vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), m_device));
409 #endif // CTS_USES_VULKANSC
410     }
411 #ifndef CTS_USES_VULKANSC
412     const vk::DeviceInterface &vkdi = m_context.getDeviceInterface();
413 #else
414     const vk::DeviceInterface &vkdi =
415         (m_computeQueueOnly && (DE_NULL != m_deviceDriver)) ? *m_deviceDriver : m_context.getDeviceInterface();
416 #endif // CTS_USES_VULKANSC
417     if (m_computeQueueOnly)
418     {
419         m_queue     = getDeviceQueue(vkdi, m_device, m_queueFamilyIndex, 0u);
420         m_allocator = de::MovePtr<vk::Allocator>(new vk::SimpleAllocator(
421             vkdi, m_device, vk::getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice())));
422     }
423     vk::Allocator &allocator = m_allocator.get() ? *m_allocator : m_context.getDefaultAllocator();
424 
425     // Create result buffer
426     const vk::VkDeviceSize resultBlockSize =
427         getResultBlockAlignedSize(vki, m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
428     const vk::VkDeviceSize resultBufferSize = resultBlockSize * (uint32_t)m_dispatchCommands.size();
429 
430     vk::BufferWithMemory resultBuffer(
431         vkdi, m_device, allocator, vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
432         vk::MemoryRequirement::HostVisible);
433 
434     {
435         const vk::Allocation &alloc = resultBuffer.getAllocation();
436         uint8_t *resultDataPtr      = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
437 
438         for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
439         {
440             uint8_t *const dstPtr = &resultDataPtr[resultBlockSize * cmdNdx];
441 
442             *(uint32_t *)(dstPtr + 0 * sizeof(uint32_t))           = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
443             *(uint32_t *)(dstPtr + 1 * sizeof(uint32_t))           = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
444             *(uint32_t *)(dstPtr + 2 * sizeof(uint32_t))           = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
445             *(uint32_t *)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
446         }
447 
448         vk::flushAlloc(vkdi, m_device, alloc);
449     }
450 
451     // Create descriptorSetLayout
452     vk::DescriptorSetLayoutBuilder layoutBuilder;
453     layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
454     vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, m_device));
455 
456     // Create compute pipeline
457     vk::ComputePipelineWrapper computePipeline(
458         vkdi, m_device, m_computePipelineConstructionType,
459         m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"));
460     computePipeline.setDescriptorSetLayout(descriptorSetLayout.get());
461     computePipeline.buildPipeline();
462 
463     // Create descriptor pool
464     const vk::Unique<vk::VkDescriptorPool> descriptorPool(
465         vk::DescriptorPoolBuilder()
466             .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (uint32_t)m_dispatchCommands.size())
467             .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
468                    static_cast<uint32_t>(m_dispatchCommands.size())));
469 
470     const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
471         vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
472 
473     // Create command buffer
474     const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(vkdi, m_device, m_queueFamilyIndex));
475     const vk::Unique<vk::VkCommandBuffer> cmdBuffer(
476         allocateCommandBuffer(vkdi, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
477 
478     // Begin recording commands
479     beginCommandBuffer(vkdi, *cmdBuffer);
480 
481     // Create indirect buffer
482     vk::BufferWithMemory indirectBuffer(
483         vkdi, m_device, allocator,
484         vk::makeBufferCreateInfo(m_bufferSize,
485                                  vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
486         vk::MemoryRequirement::HostVisible);
487     fillIndirectBufferData(*cmdBuffer, vkdi, indirectBuffer);
488 
489     // Bind compute pipeline
490     computePipeline.bind(*cmdBuffer);
491 
492     // Allocate descriptor sets
493     typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet>> SharedVkDescriptorSet;
494     std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
495 
496     vk::VkDeviceSize curOffset = 0;
497 
498     // Create descriptor sets
499     for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
500     {
501         descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
502             makeDescriptorSet(vkdi, m_device, *descriptorPool, *descriptorSetLayout)));
503 
504         const vk::VkDescriptorBufferInfo resultDescriptorInfo =
505             makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
506 
507         vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
508         descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx],
509                                          vk::DescriptorSetUpdateBuilder::Location::binding(0u),
510                                          vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
511         descriptorSetBuilder.update(vkdi, m_device);
512 
513         // Bind descriptor set
514         vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.getPipelineLayout(),
515                                    0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
516 
517         // Dispatch indirect compute command
518         vkdi.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
519 
520         curOffset += resultBlockSize;
521     }
522 
523     // Insert memory barrier
524     vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
525                             (vk::VkDependencyFlags)0, 0, (const vk::VkMemoryBarrier *)DE_NULL, 1, &ssboPostBarrier, 0,
526                             (const vk::VkImageMemoryBarrier *)DE_NULL);
527 
528     // End recording commands
529     endCommandBuffer(vkdi, *cmdBuffer);
530 
531     // Wait for command buffer execution finish
532     submitCommandsAndWait(vkdi, m_device, m_queue, *cmdBuffer);
533 
534     // Check if result buffer contains valid values
535     if (verifyResultBuffer(resultBuffer, vkdi, resultBlockSize))
536         return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
537     else
538         return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
539 }
540 
verifyResultBuffer(const vk::BufferWithMemory & resultBuffer,const vk::DeviceInterface & vkdi,const vk::VkDeviceSize resultBlockSize) const541 bool IndirectDispatchInstanceBufferUpload::verifyResultBuffer(const vk::BufferWithMemory &resultBuffer,
542                                                               const vk::DeviceInterface &vkdi,
543                                                               const vk::VkDeviceSize resultBlockSize) const
544 {
545     bool allOk                  = true;
546     const vk::Allocation &alloc = resultBuffer.getAllocation();
547     vk::invalidateAlloc(vkdi, m_device, alloc);
548 
549     const uint8_t *const resultDataPtr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
550 
551     for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
552     {
553         const DispatchCommand &cmd            = m_dispatchCommands[cmdNdx];
554         const uint8_t *const srcPtr           = (const uint8_t *)resultDataPtr + cmdNdx * resultBlockSize;
555         const uint32_t numPassed              = *(const uint32_t *)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
556         const uint32_t numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
557         const uint32_t numGroups     = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
558         const uint32_t expectedCount = numInvocationsPerGroup * numGroups;
559 
560         if (numPassed != expectedCount)
561         {
562             tcu::TestContext &testCtx = m_context.getTestContext();
563 
564             testCtx.getLog() << tcu::TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx
565                              << ": got numPassed = " << numPassed << ", expected " << expectedCount
566                              << tcu::TestLog::EndMessage;
567 
568             allOk = false;
569         }
570     }
571 
572     return allOk;
573 }
574 
575 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
576 {
577 public:
578     IndirectDispatchCaseBufferUpload(tcu::TestContext &testCtx, const DispatchCaseDesc &caseDesc,
579                                      const glu::GLSLVersion glslVersion,
580                                      const vk::ComputePipelineConstructionType computePipelineConstructionType);
581 
~IndirectDispatchCaseBufferUpload(void)582     virtual ~IndirectDispatchCaseBufferUpload(void)
583     {
584     }
585 
586     virtual void initPrograms(vk::SourceCollections &programCollection) const;
587     virtual TestInstance *createInstance(Context &context) const;
588     virtual void checkSupport(Context &context) const;
589 
590 protected:
591     const uintptr_t m_bufferSize;
592     const tcu::UVec3 m_workGroupSize;
593     const DispatchCommandsVec m_dispatchCommands;
594     const glu::GLSLVersion m_glslVersion;
595     const bool m_computeOnlyQueue;
596     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
597 
598 private:
599     IndirectDispatchCaseBufferUpload(const vkt::TestCase &);
600     IndirectDispatchCaseBufferUpload &operator=(const vkt::TestCase &);
601 };
602 
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)603 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload(
604     tcu::TestContext &testCtx, const DispatchCaseDesc &caseDesc, const glu::GLSLVersion glslVersion,
605     const vk::ComputePipelineConstructionType computePipelineConstructionType)
606     : vkt::TestCase(testCtx, caseDesc.m_name)
607     , m_bufferSize(caseDesc.m_bufferSize)
608     , m_workGroupSize(caseDesc.m_workGroupSize)
609     , m_dispatchCommands(caseDesc.m_dispatchCommands)
610     , m_glslVersion(glslVersion)
611     , m_computeOnlyQueue(caseDesc.m_computeOnlyQueue)
612     , m_computePipelineConstructionType(computePipelineConstructionType)
613 {
614 }
615 
initPrograms(vk::SourceCollections & programCollection) const616 void IndirectDispatchCaseBufferUpload::initPrograms(vk::SourceCollections &programCollection) const
617 {
618     const char *const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
619 
620     std::ostringstream verifyBuffer;
621 
622     verifyBuffer << versionDecl << "\n"
623                  << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = "
624                     "${LOCAL_SIZE_Z}) in;\n"
625                  << "layout(set = 0, binding = 0, std430) buffer Result\n"
626                  << "{\n"
627                  << "    uvec3           expectedGroupCount;\n"
628                  << "    coherent uint   numPassed;\n"
629                  << "} result;\n"
630                  << "void main (void)\n"
631                  << "{\n"
632                  << "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
633                  << "        atomicAdd(result.numPassed, 1u);\n"
634                  << "}\n";
635 
636     std::map<std::string, std::string> args;
637 
638     args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
639     args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
640     args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
641 
642     std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
643 
644     programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify")
645         << glu::ComputeSource(verifyProgramString);
646 }
647 
createInstance(Context & context) const648 TestInstance *IndirectDispatchCaseBufferUpload::createInstance(Context &context) const
649 {
650     return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands,
651                                                     m_computeOnlyQueue, m_computePipelineConstructionType);
652 }
653 
checkSupport(Context & context) const654 void IndirectDispatchCaseBufferUpload::checkSupport(Context &context) const
655 {
656     // Find at least one queue family that supports compute queue but does NOT support graphics queue.
657     if (m_computeOnlyQueue)
658     {
659         bool foundQueue = false;
660         const std::vector<vk::VkQueueFamilyProperties> queueFamilies =
661             getPhysicalDeviceQueueFamilyProperties(context.getInstanceInterface(), context.getPhysicalDevice());
662 
663         for (const auto &queueFamily : queueFamilies)
664         {
665             if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
666                 !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
667             {
668                 foundQueue = true;
669                 break;
670             }
671         }
672         if (!foundQueue)
673             TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
674     }
675 
676     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
677                                   m_computePipelineConstructionType);
678 }
679 
680 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
681 {
682 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const uintptr_t bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeOnlyQueue,const vk::ComputePipelineConstructionType computePipelineConstructionType)683     IndirectDispatchInstanceBufferGenerate(Context &context, const std::string &name, const uintptr_t bufferSize,
684                                            const tcu::UVec3 &workGroupSize, const DispatchCommandsVec &dispatchCommands,
685                                            const bool computeOnlyQueue,
686                                            const vk::ComputePipelineConstructionType computePipelineConstructionType)
687 
688         : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands,
689                                                computeOnlyQueue, computePipelineConstructionType)
690     {
691     }
692 
~IndirectDispatchInstanceBufferGenerate(void)693     virtual ~IndirectDispatchInstanceBufferGenerate(void)
694     {
695     }
696 
697 protected:
698     virtual void fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface &vkdi,
699                                         const vk::BufferWithMemory &indirectBuffer);
700 
701     vk::Move<vk::VkDescriptorSetLayout> m_descriptorSetLayout;
702     vk::Move<vk::VkDescriptorPool> m_descriptorPool;
703     vk::Move<vk::VkDescriptorSet> m_descriptorSet;
704     vk::Move<vk::VkPipelineLayout> m_pipelineLayout;
705     vk::Move<vk::VkPipeline> m_computePipeline;
706 
707 private:
708     IndirectDispatchInstanceBufferGenerate(const vkt::TestInstance &);
709     IndirectDispatchInstanceBufferGenerate &operator=(const vkt::TestInstance &);
710 };
711 
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)712 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,
713                                                                     const vk::DeviceInterface &vkdi,
714                                                                     const vk::BufferWithMemory &indirectBuffer)
715 {
716     // Create compute shader that generates data for indirect buffer
717     const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
718         vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
719 
720     // Create descriptorSetLayout
721     m_descriptorSetLayout =
722         vk::DescriptorSetLayoutBuilder()
723             .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT)
724             .build(vkdi, m_device);
725 
726     // Create compute pipeline
727     m_pipelineLayout  = makePipelineLayout(vkdi, m_device, *m_descriptorSetLayout);
728     m_computePipeline = makeComputePipeline(vkdi, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
729 
730     // Create descriptor pool
731     m_descriptorPool = vk::DescriptorPoolBuilder()
732                            .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
733                            .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
734 
735     // Create descriptor set
736     m_descriptorSet = makeDescriptorSet(vkdi, m_device, *m_descriptorPool, *m_descriptorSetLayout);
737 
738     const vk::VkDescriptorBufferInfo indirectDescriptorInfo =
739         makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
740 
741     vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
742     descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u),
743                                      vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
744     descriptorSetBuilder.update(vkdi, m_device);
745 
746     const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
747         vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
748 
749     // Bind compute pipeline
750     vkdi.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
751 
752     // Bind descriptor set
753     vkdi.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u,
754                                &m_descriptorSet.get(), 0u, DE_NULL);
755 
756     // Dispatch compute command
757     vkdi.cmdDispatch(commandBuffer, 1u, 1u, 1u);
758 
759     // Insert memory barrier
760     vkdi.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
761                             vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0, 0,
762                             (const vk::VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
763                             (const vk::VkImageMemoryBarrier *)DE_NULL);
764 }
765 
766 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
767 {
768 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)769     IndirectDispatchCaseBufferGenerate(tcu::TestContext &testCtx, const DispatchCaseDesc &caseDesc,
770                                        const glu::GLSLVersion glslVersion,
771                                        const vk::ComputePipelineConstructionType computePipelineConstructionType)
772         : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion, computePipelineConstructionType)
773     {
774     }
775 
~IndirectDispatchCaseBufferGenerate(void)776     virtual ~IndirectDispatchCaseBufferGenerate(void)
777     {
778     }
779 
780     virtual void initPrograms(vk::SourceCollections &programCollection) const;
781     virtual TestInstance *createInstance(Context &context) const;
782 
783 private:
784     IndirectDispatchCaseBufferGenerate(const vkt::TestCase &);
785     IndirectDispatchCaseBufferGenerate &operator=(const vkt::TestCase &);
786 };
787 
initPrograms(vk::SourceCollections & programCollection) const788 void IndirectDispatchCaseBufferGenerate::initPrograms(vk::SourceCollections &programCollection) const
789 {
790     IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
791 
792     const char *const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
793 
794     std::ostringstream computeBuffer;
795 
796     // Header
797     computeBuffer << versionDecl << "\n"
798                   << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
799                   << "layout(set = 0, binding = 0, std430) buffer Out\n"
800                   << "{\n"
801                   << "    highp uint data[];\n"
802                   << "};\n"
803                   << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
804                   << "{\n"
805                   << "    data[offset+0u] = numWorkGroups.x;\n"
806                   << "    data[offset+1u] = numWorkGroups.y;\n"
807                   << "    data[offset+2u] = numWorkGroups.z;\n"
808                   << "}\n"
809                   << "void main (void)\n"
810                   << "{\n";
811 
812     // Dispatch commands
813     for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end();
814          ++cmdIter)
815     {
816         const uint32_t offs = (uint32_t)(cmdIter->m_offset / sizeof(uint32_t));
817         DE_ASSERT((size_t)offs * sizeof(uint32_t) == (size_t)cmdIter->m_offset);
818 
819         computeBuffer << "\twriteCmd(" << offs << "u, uvec3(" << cmdIter->m_numWorkGroups.x() << "u, "
820                       << cmdIter->m_numWorkGroups.y() << "u, " << cmdIter->m_numWorkGroups.z() << "u));\n";
821     }
822 
823     // Ending
824     computeBuffer << "}\n";
825 
826     std::string computeString = computeBuffer.str();
827 
828     programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
829 }
830 
createInstance(Context & context) const831 TestInstance *IndirectDispatchCaseBufferGenerate::createInstance(Context &context) const
832 {
833     return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize,
834                                                       m_dispatchCommands, m_computeOnlyQueue,
835                                                       m_computePipelineConstructionType);
836 }
837 
commandsVec(const DispatchCommand & cmd)838 DispatchCommandsVec commandsVec(const DispatchCommand &cmd)
839 {
840     DispatchCommandsVec vec;
841     vec.push_back(cmd);
842     return vec;
843 }
844 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)845 DispatchCommandsVec commandsVec(const DispatchCommand &cmd0, const DispatchCommand &cmd1, const DispatchCommand &cmd2,
846                                 const DispatchCommand &cmd3, const DispatchCommand &cmd4)
847 {
848     DispatchCommandsVec vec;
849     vec.push_back(cmd0);
850     vec.push_back(cmd1);
851     vec.push_back(cmd2);
852     vec.push_back(cmd3);
853     vec.push_back(cmd4);
854     return vec;
855 }
856 
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)857 DispatchCommandsVec commandsVec(const DispatchCommand &cmd0, const DispatchCommand &cmd1, const DispatchCommand &cmd2,
858                                 const DispatchCommand &cmd3, const DispatchCommand &cmd4, const DispatchCommand &cmd5,
859                                 const DispatchCommand &cmd6)
860 {
861     DispatchCommandsVec vec;
862     vec.push_back(cmd0);
863     vec.push_back(cmd1);
864     vec.push_back(cmd2);
865     vec.push_back(cmd3);
866     vec.push_back(cmd4);
867     vec.push_back(cmd5);
868     vec.push_back(cmd6);
869     return vec;
870 }
871 
872 } // namespace
873 
createIndirectComputeDispatchTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)874 tcu::TestCaseGroup *createIndirectComputeDispatchTests(
875     tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
876 {
877 
878     static const DispatchCaseDesc s_dispatchCases[] = {
879         // Single invocation only from offset 0
880         DispatchCaseDesc("single_invocation", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
881                          commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))), false),
882         // Multiple groups dispatched from offset 0
883         DispatchCaseDesc("multiple_groups", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
884                          commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))), false),
885         // Multiple groups of size 2x3x1 from offset 0
886         DispatchCaseDesc("multiple_groups_multiple_invocations", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
887                          commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))), false),
888         DispatchCaseDesc("small_offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
889                          commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))), false),
890         DispatchCaseDesc("large_offset", (2 << 20), tcu::UVec3(1, 1, 1),
891                          commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))), false),
892         DispatchCaseDesc("large_offset_multiple_invocations", (2 << 20), tcu::UVec3(2, 3, 1),
893                          commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))), false),
894         DispatchCaseDesc("empty_command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
895                          commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))), false),
896         // Dispatch multiple compute commands from single buffer
897         DispatchCaseDesc("multi_dispatch", 1 << 10, tcu::UVec3(3, 1, 2),
898                          commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
899                                      DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
900                                      DispatchCommand(104, tcu::UVec3(1, 3, 1)),
901                                      DispatchCommand(40, tcu::UVec3(1, 1, 7)),
902                                      DispatchCommand(52, tcu::UVec3(1, 1, 4))),
903                          false),
904         // Dispatch multiple compute commands from single buffer
905         DispatchCaseDesc("multi_dispatch_reuse_command", 1 << 10, tcu::UVec3(3, 1, 2),
906                          commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)), DispatchCommand(0, tcu::UVec3(1, 1, 1)),
907                                      DispatchCommand(0, tcu::UVec3(1, 1, 1)), DispatchCommand(104, tcu::UVec3(1, 3, 1)),
908                                      DispatchCommand(104, tcu::UVec3(1, 3, 1)),
909                                      DispatchCommand(52, tcu::UVec3(1, 1, 4)),
910                                      DispatchCommand(52, tcu::UVec3(1, 1, 4))),
911                          false),
912     };
913 
914     de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch"));
915 
916     tcu::TestCaseGroup *const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer");
917     indirectComputeDispatchTests->addChild(groupBufferUpload);
918 
919     for (uint32_t ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
920     {
921         DispatchCaseDesc caseDesc        = s_dispatchCases[ndx];
922         std::string computeName          = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
923         DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(
924             computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize, caseDesc.m_dispatchCommands, true);
925         groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glu::GLSL_VERSION_310_ES,
926                                                                          computePipelineConstructionType));
927         groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(
928             testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
929     }
930 
931     tcu::TestCaseGroup *const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute");
932     indirectComputeDispatchTests->addChild(groupBufferGenerate);
933 
934     for (uint32_t ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
935     {
936         DispatchCaseDesc caseDesc        = s_dispatchCases[ndx];
937         std::string computeName          = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
938         DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(
939             computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize, caseDesc.m_dispatchCommands, true);
940         groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(
941             testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
942         groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(
943             testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
944     }
945 
946     return indirectComputeDispatchTests.release();
947 }
948 
949 } // namespace compute
950 } // namespace vkt
951