1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 The Android Open Source Project
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief Indirect Compute Dispatch tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeIndirectComputeDispatchTests.hpp"
28 #include "vktComputeTestsUtil.hpp"
29 #include "vktCustomInstancesDevices.hpp"
30 #include "vkSafetyCriticalUtil.hpp"
31
32 #include <string>
33 #include <map>
34 #include <vector>
35
36 #include "vkDefs.hpp"
37 #include "vkRef.hpp"
38 #include "vkRefUtil.hpp"
39 #include "vktTestCase.hpp"
40 #include "vktTestCaseUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkBarrierUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkQueryUtil.hpp"
47 #include "vkDeviceUtil.hpp"
48 #include "vkCmdUtil.hpp"
49 #include "vkObjUtil.hpp"
50 #include "vkBufferWithMemory.hpp"
51
52 #include "tcuVector.hpp"
53 #include "tcuVectorUtil.hpp"
54 #include "tcuTestLog.hpp"
55 #include "tcuRGBA.hpp"
56 #include "tcuStringTemplate.hpp"
57
58 #include "deUniquePtr.hpp"
59 #include "deSharedPtr.hpp"
60 #include "deStringUtil.hpp"
61 #include "deArrayUtil.hpp"
62
63 #include "gluShaderUtil.hpp"
64 #include "tcuCommandLine.hpp"
65
66 #include <set>
67
68 namespace vkt
69 {
70 namespace compute
71 {
72 namespace
73 {
removeCoreExtensions(const std::vector<std::string> & supportedExtensions,const std::vector<const char * > & coreExtensions)74 std::vector<std::string> removeCoreExtensions(const std::vector<std::string> &supportedExtensions,
75 const std::vector<const char *> &coreExtensions)
76 {
77 std::vector<std::string> nonCoreExtensions;
78 std::set<std::string> excludedExtensions(coreExtensions.begin(), coreExtensions.end());
79
80 for (const auto &supportedExtension : supportedExtensions)
81 {
82 if (!de::contains(excludedExtensions, supportedExtension))
83 nonCoreExtensions.push_back(supportedExtension);
84 }
85
86 return nonCoreExtensions;
87 }
88
89 // Creates a device that has a queue for compute capabilities without graphics.
createCustomDevice(Context & context,const vkt::CustomInstance & customInstance,uint32_t & queueFamilyIndex)90 vk::Move<vk::VkDevice> createCustomDevice(Context &context,
91 #ifdef CTS_USES_VULKANSC
92 const vkt::CustomInstance &customInstance,
93 #endif // CTS_USES_VULKANSC
94 uint32_t &queueFamilyIndex)
95 {
96 #ifdef CTS_USES_VULKANSC
97 const vk::InstanceInterface &instanceDriver = customInstance.getDriver();
98 const vk::VkPhysicalDevice physicalDevice =
99 chooseDevice(instanceDriver, customInstance, context.getTestContext().getCommandLine());
100 #else
101 const vk::InstanceInterface &instanceDriver = context.getInstanceInterface();
102 const vk::VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
103 #endif // CTS_USES_VULKANSC
104
105 const std::vector<vk::VkQueueFamilyProperties> queueFamilies =
106 getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
107
108 queueFamilyIndex = 0;
109 for (const auto &queueFamily : queueFamilies)
110 {
111 if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
112 !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT) &&
113 queueFamilyIndex != context.getUniversalQueueFamilyIndex())
114 break;
115 else
116 queueFamilyIndex++;
117 }
118
119 // One queue family without a graphics bit should be found, since this is checked in checkSupport.
120 DE_ASSERT(queueFamilyIndex < queueFamilies.size());
121
122 const float queuePriority = 1.0f;
123 const vk::VkDeviceQueueCreateInfo deviceQueueCreateInfos[] = {
124 {
125 vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
126 DE_NULL, // const void* pNext;
127 (vk::VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
128 context.getUniversalQueueFamilyIndex(), // uint32_t queueFamilyIndex;
129 1u, // uint32_t queueCount;
130 &queuePriority, // const float* pQueuePriorities;
131 },
132 {
133 vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
134 DE_NULL, // const void* pNext;
135 (vk::VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
136 queueFamilyIndex, // uint32_t queueFamilyIndex;
137 1u, // uint32_t queueCount;
138 &queuePriority, // const float* pQueuePriorities;
139 }};
140
141 // context.getDeviceExtensions() returns supported device extension including extensions that have been promoted to
142 // Vulkan core. The core extensions must be removed from the list.
143 std::vector<const char *> coreExtensions;
144 vk::getCoreDeviceExtensions(context.getUsedApiVersion(), coreExtensions);
145 std::vector<std::string> nonCoreExtensions(removeCoreExtensions(context.getDeviceExtensions(), coreExtensions));
146
147 std::vector<const char *> extensionNames;
148 extensionNames.reserve(nonCoreExtensions.size());
149 for (const std::string &extension : nonCoreExtensions)
150 extensionNames.push_back(extension.c_str());
151
152 const auto &deviceFeatures2 = context.getDeviceFeatures2();
153
154 const void *pNext = &deviceFeatures2;
155 #ifdef CTS_USES_VULKANSC
156 VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ?
157 context.getResourceInterface()->getStatMax() :
158 resetDeviceObjectReservationCreateInfo();
159 memReservationInfo.pNext = pNext;
160 pNext = &memReservationInfo;
161
162 VkPipelineCacheCreateInfo pcCI;
163 std::vector<VkPipelinePoolSize> poolSizes;
164 if (context.getTestContext().getCommandLine().isSubProcess())
165 {
166 if (context.getResourceInterface()->getCacheDataSize() > 0)
167 {
168 pcCI = {
169 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
170 DE_NULL, // const void* pNext;
171 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
172 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
173 context.getResourceInterface()->getCacheDataSize(), // uintptr_t initialDataSize;
174 context.getResourceInterface()->getCacheData() // const void* pInitialData;
175 };
176 memReservationInfo.pipelineCacheCreateInfoCount = 1;
177 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
178 }
179 poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
180 if (!poolSizes.empty())
181 {
182 memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
183 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
184 }
185 }
186 #endif // CTS_USES_VULKANSC
187
188 const vk::VkDeviceCreateInfo deviceCreateInfo = {
189 vk::VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
190 pNext, // const void* pNext;
191 (vk::VkDeviceCreateFlags)0u, // VkDeviceCreateFlags flags;
192 DE_LENGTH_OF_ARRAY(deviceQueueCreateInfos), // uint32_t queueCreateInfoCount;
193 deviceQueueCreateInfos, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
194 0u, // uint32_t enabledLayerCount;
195 DE_NULL, // const char* const* ppEnabledLayerNames;
196 static_cast<uint32_t>(extensionNames.size()), // uint32_t enabledExtensionCount;
197 extensionNames.data(), // const char* const* ppEnabledExtensionNames;
198 DE_NULL, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
199 };
200
201 return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
202 context.getPlatformInterface(),
203 #ifdef CTS_USES_VULKANSC
204 customInstance,
205 #else
206 context.getInstance(),
207 #endif
208 instanceDriver, physicalDevice, &deviceCreateInfo);
209 }
210
211 enum
212 {
213 RESULT_BLOCK_BASE_SIZE = 4 * (int)sizeof(uint32_t), // uvec3 + uint
214 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(uint32_t),
215 INDIRECT_COMMAND_OFFSET = 3 * (int)sizeof(uint32_t),
216 };
217
getResultBlockAlignedSize(const vk::InstanceInterface & instance_interface,const vk::VkPhysicalDevice physicalDevice,const vk::VkDeviceSize baseSize)218 vk::VkDeviceSize getResultBlockAlignedSize(const vk::InstanceInterface &instance_interface,
219 const vk::VkPhysicalDevice physicalDevice, const vk::VkDeviceSize baseSize)
220 {
221 // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
222 vk::VkPhysicalDeviceProperties deviceProperties;
223 instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
224 vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
225
226 if (alignment == 0 || (baseSize % alignment == 0))
227 return baseSize;
228 else
229 return (baseSize / alignment + 1) * alignment;
230 }
231
232 struct DispatchCommand
233 {
DispatchCommandvkt::compute::__anon8ba84f650111::DispatchCommand234 DispatchCommand(const intptr_t offset, const tcu::UVec3 &numWorkGroups)
235 : m_offset(offset)
236 , m_numWorkGroups(numWorkGroups)
237 {
238 }
239
240 intptr_t m_offset;
241 tcu::UVec3 m_numWorkGroups;
242 };
243
244 typedef std::vector<DispatchCommand> DispatchCommandsVec;
245
246 struct DispatchCaseDesc
247 {
DispatchCaseDescvkt::compute::__anon8ba84f650111::DispatchCaseDesc248 DispatchCaseDesc(const char *name, const uintptr_t bufferSize, const tcu::UVec3 workGroupSize,
249 const DispatchCommandsVec &dispatchCommands, const bool computeQueueOnly)
250 : m_name(name)
251 , m_bufferSize(bufferSize)
252 , m_workGroupSize(workGroupSize)
253 , m_dispatchCommands(dispatchCommands)
254 , m_computeOnlyQueue(computeQueueOnly)
255 {
256 }
257
258 const char *m_name;
259 const uintptr_t m_bufferSize;
260 const tcu::UVec3 m_workGroupSize;
261 const DispatchCommandsVec m_dispatchCommands;
262 const bool m_computeOnlyQueue;
263 };
264
265 class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
266 {
267 public:
268 IndirectDispatchInstanceBufferUpload(Context &context, const std::string &name, const uintptr_t bufferSize,
269 const tcu::UVec3 &workGroupSize, const DispatchCommandsVec &dispatchCommands,
270 const bool computeQueueOnly,
271 const vk::ComputePipelineConstructionType computePipelineConstructionType);
272
~IndirectDispatchInstanceBufferUpload(void)273 virtual ~IndirectDispatchInstanceBufferUpload(void)
274 {
275 }
276
277 virtual tcu::TestStatus iterate(void);
278
279 protected:
280 virtual void fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface &vkdi,
281 const vk::BufferWithMemory &indirectBuffer);
282
283 bool verifyResultBuffer(const vk::BufferWithMemory &resultBuffer, const vk::DeviceInterface &vkdi,
284 const vk::VkDeviceSize resultBlockSize) const;
285
286 Context &m_context;
287 const std::string m_name;
288
289 vk::VkDevice m_device;
290 #ifdef CTS_USES_VULKANSC
291 const CustomInstance m_customInstance;
292 #endif // CTS_USES_VULKANSC
293 vk::Move<vk::VkDevice> m_customDevice;
294 #ifndef CTS_USES_VULKANSC
295 de::MovePtr<vk::DeviceDriver> m_deviceDriver;
296 #else
297 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> m_deviceDriver;
298 #endif // CTS_USES_VULKANSC
299
300 vk::VkQueue m_queue;
301 uint32_t m_queueFamilyIndex;
302
303 const uintptr_t m_bufferSize;
304 const tcu::UVec3 m_workGroupSize;
305 const DispatchCommandsVec m_dispatchCommands;
306
307 de::MovePtr<vk::Allocator> m_allocator;
308
309 const bool m_computeQueueOnly;
310 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
311
312 private:
313 IndirectDispatchInstanceBufferUpload(const vkt::TestInstance &);
314 IndirectDispatchInstanceBufferUpload &operator=(const vkt::TestInstance &);
315 };
316
IndirectDispatchInstanceBufferUpload(Context & context,const std::string & name,const uintptr_t bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeQueueOnly,const vk::ComputePipelineConstructionType computePipelineConstructionType)317 IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload(
318 Context &context, const std::string &name, const uintptr_t bufferSize, const tcu::UVec3 &workGroupSize,
319 const DispatchCommandsVec &dispatchCommands, const bool computeQueueOnly,
320 const vk::ComputePipelineConstructionType computePipelineConstructionType)
321 : vkt::TestInstance(context)
322 , m_context(context)
323 , m_name(name)
324 , m_device(context.getDevice())
325 #ifdef CTS_USES_VULKANSC
326 , m_customInstance(createCustomInstanceFromContext(context))
327 #endif // CTS_USES_VULKANSC
328 , m_queue(context.getUniversalQueue())
329 , m_queueFamilyIndex(context.getUniversalQueueFamilyIndex())
330 , m_bufferSize(bufferSize)
331 , m_workGroupSize(workGroupSize)
332 , m_dispatchCommands(dispatchCommands)
333 , m_computeQueueOnly(computeQueueOnly)
334 , m_computePipelineConstructionType(computePipelineConstructionType)
335 {
336 }
337
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)338 void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,
339 const vk::DeviceInterface &vkdi,
340 const vk::BufferWithMemory &indirectBuffer)
341 {
342 DE_UNREF(commandBuffer);
343
344 const vk::Allocation &alloc = indirectBuffer.getAllocation();
345 uint8_t *indirectDataPtr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
346
347 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end();
348 ++cmdIter)
349 {
350 DE_ASSERT(cmdIter->m_offset >= 0);
351 DE_ASSERT(cmdIter->m_offset % sizeof(uint32_t) == 0);
352 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (intptr_t)m_bufferSize);
353
354 uint32_t *const dstPtr = (uint32_t *)&indirectDataPtr[cmdIter->m_offset];
355
356 dstPtr[0] = cmdIter->m_numWorkGroups[0];
357 dstPtr[1] = cmdIter->m_numWorkGroups[1];
358 dstPtr[2] = cmdIter->m_numWorkGroups[2];
359 }
360
361 vk::flushAlloc(vkdi, m_device, alloc);
362 }
363
iterate(void)364 tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate(void)
365 {
366 #ifdef CTS_USES_VULKANSC
367 const vk::InstanceInterface &vki = m_customInstance.getDriver();
368 #else
369 const vk::InstanceInterface &vki = m_context.getInstanceInterface();
370 #endif // CTS_USES_VULKANSC
371 tcu::TestContext &testCtx = m_context.getTestContext();
372
373 testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize
374 << tcu::TestLog::EndMessage;
375 {
376 tcu::ScopedLogSection section(testCtx.getLog(), "Commands",
377 "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) +
378 " in total)");
379
380 for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
381 {
382 testCtx.getLog() << tcu::TestLog::Message << cmdNdx << ": "
383 << "offset = " << m_dispatchCommands[cmdNdx].m_offset
384 << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
385 << tcu::TestLog::EndMessage;
386 }
387 }
388
389 if (m_computeQueueOnly)
390 {
391 // m_queueFamilyIndex will be updated in createCustomDevice() to match the requested queue type.
392 m_customDevice = createCustomDevice(m_context,
393 #ifdef CTS_USES_VULKANSC
394 m_customInstance,
395 #endif
396 m_queueFamilyIndex);
397 m_device = m_customDevice.get();
398 #ifndef CTS_USES_VULKANSC
399 m_deviceDriver = de::MovePtr<vk::DeviceDriver>(
400 new vk::DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(), m_device,
401 m_context.getUsedApiVersion(), m_context.getTestContext().getCommandLine()));
402 #else
403 m_deviceDriver = de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter>(
404 new vk::DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, m_device,
405 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
406 m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
407 m_context.getUsedApiVersion()),
408 vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), m_device));
409 #endif // CTS_USES_VULKANSC
410 }
411 #ifndef CTS_USES_VULKANSC
412 const vk::DeviceInterface &vkdi = m_context.getDeviceInterface();
413 #else
414 const vk::DeviceInterface &vkdi =
415 (m_computeQueueOnly && (DE_NULL != m_deviceDriver)) ? *m_deviceDriver : m_context.getDeviceInterface();
416 #endif // CTS_USES_VULKANSC
417 if (m_computeQueueOnly)
418 {
419 m_queue = getDeviceQueue(vkdi, m_device, m_queueFamilyIndex, 0u);
420 m_allocator = de::MovePtr<vk::Allocator>(new vk::SimpleAllocator(
421 vkdi, m_device, vk::getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice())));
422 }
423 vk::Allocator &allocator = m_allocator.get() ? *m_allocator : m_context.getDefaultAllocator();
424
425 // Create result buffer
426 const vk::VkDeviceSize resultBlockSize =
427 getResultBlockAlignedSize(vki, m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
428 const vk::VkDeviceSize resultBufferSize = resultBlockSize * (uint32_t)m_dispatchCommands.size();
429
430 vk::BufferWithMemory resultBuffer(
431 vkdi, m_device, allocator, vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
432 vk::MemoryRequirement::HostVisible);
433
434 {
435 const vk::Allocation &alloc = resultBuffer.getAllocation();
436 uint8_t *resultDataPtr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
437
438 for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
439 {
440 uint8_t *const dstPtr = &resultDataPtr[resultBlockSize * cmdNdx];
441
442 *(uint32_t *)(dstPtr + 0 * sizeof(uint32_t)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
443 *(uint32_t *)(dstPtr + 1 * sizeof(uint32_t)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
444 *(uint32_t *)(dstPtr + 2 * sizeof(uint32_t)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
445 *(uint32_t *)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
446 }
447
448 vk::flushAlloc(vkdi, m_device, alloc);
449 }
450
451 // Create descriptorSetLayout
452 vk::DescriptorSetLayoutBuilder layoutBuilder;
453 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
454 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, m_device));
455
456 // Create compute pipeline
457 vk::ComputePipelineWrapper computePipeline(
458 vkdi, m_device, m_computePipelineConstructionType,
459 m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"));
460 computePipeline.setDescriptorSetLayout(descriptorSetLayout.get());
461 computePipeline.buildPipeline();
462
463 // Create descriptor pool
464 const vk::Unique<vk::VkDescriptorPool> descriptorPool(
465 vk::DescriptorPoolBuilder()
466 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (uint32_t)m_dispatchCommands.size())
467 .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
468 static_cast<uint32_t>(m_dispatchCommands.size())));
469
470 const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
471 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
472
473 // Create command buffer
474 const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(vkdi, m_device, m_queueFamilyIndex));
475 const vk::Unique<vk::VkCommandBuffer> cmdBuffer(
476 allocateCommandBuffer(vkdi, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
477
478 // Begin recording commands
479 beginCommandBuffer(vkdi, *cmdBuffer);
480
481 // Create indirect buffer
482 vk::BufferWithMemory indirectBuffer(
483 vkdi, m_device, allocator,
484 vk::makeBufferCreateInfo(m_bufferSize,
485 vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
486 vk::MemoryRequirement::HostVisible);
487 fillIndirectBufferData(*cmdBuffer, vkdi, indirectBuffer);
488
489 // Bind compute pipeline
490 computePipeline.bind(*cmdBuffer);
491
492 // Allocate descriptor sets
493 typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet>> SharedVkDescriptorSet;
494 std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
495
496 vk::VkDeviceSize curOffset = 0;
497
498 // Create descriptor sets
499 for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
500 {
501 descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
502 makeDescriptorSet(vkdi, m_device, *descriptorPool, *descriptorSetLayout)));
503
504 const vk::VkDescriptorBufferInfo resultDescriptorInfo =
505 makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
506
507 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
508 descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx],
509 vk::DescriptorSetUpdateBuilder::Location::binding(0u),
510 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
511 descriptorSetBuilder.update(vkdi, m_device);
512
513 // Bind descriptor set
514 vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.getPipelineLayout(),
515 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
516
517 // Dispatch indirect compute command
518 vkdi.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
519
520 curOffset += resultBlockSize;
521 }
522
523 // Insert memory barrier
524 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
525 (vk::VkDependencyFlags)0, 0, (const vk::VkMemoryBarrier *)DE_NULL, 1, &ssboPostBarrier, 0,
526 (const vk::VkImageMemoryBarrier *)DE_NULL);
527
528 // End recording commands
529 endCommandBuffer(vkdi, *cmdBuffer);
530
531 // Wait for command buffer execution finish
532 submitCommandsAndWait(vkdi, m_device, m_queue, *cmdBuffer);
533
534 // Check if result buffer contains valid values
535 if (verifyResultBuffer(resultBuffer, vkdi, resultBlockSize))
536 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
537 else
538 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
539 }
540
verifyResultBuffer(const vk::BufferWithMemory & resultBuffer,const vk::DeviceInterface & vkdi,const vk::VkDeviceSize resultBlockSize) const541 bool IndirectDispatchInstanceBufferUpload::verifyResultBuffer(const vk::BufferWithMemory &resultBuffer,
542 const vk::DeviceInterface &vkdi,
543 const vk::VkDeviceSize resultBlockSize) const
544 {
545 bool allOk = true;
546 const vk::Allocation &alloc = resultBuffer.getAllocation();
547 vk::invalidateAlloc(vkdi, m_device, alloc);
548
549 const uint8_t *const resultDataPtr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
550
551 for (uint32_t cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
552 {
553 const DispatchCommand &cmd = m_dispatchCommands[cmdNdx];
554 const uint8_t *const srcPtr = (const uint8_t *)resultDataPtr + cmdNdx * resultBlockSize;
555 const uint32_t numPassed = *(const uint32_t *)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
556 const uint32_t numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
557 const uint32_t numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
558 const uint32_t expectedCount = numInvocationsPerGroup * numGroups;
559
560 if (numPassed != expectedCount)
561 {
562 tcu::TestContext &testCtx = m_context.getTestContext();
563
564 testCtx.getLog() << tcu::TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx
565 << ": got numPassed = " << numPassed << ", expected " << expectedCount
566 << tcu::TestLog::EndMessage;
567
568 allOk = false;
569 }
570 }
571
572 return allOk;
573 }
574
575 class IndirectDispatchCaseBufferUpload : public vkt::TestCase
576 {
577 public:
578 IndirectDispatchCaseBufferUpload(tcu::TestContext &testCtx, const DispatchCaseDesc &caseDesc,
579 const glu::GLSLVersion glslVersion,
580 const vk::ComputePipelineConstructionType computePipelineConstructionType);
581
~IndirectDispatchCaseBufferUpload(void)582 virtual ~IndirectDispatchCaseBufferUpload(void)
583 {
584 }
585
586 virtual void initPrograms(vk::SourceCollections &programCollection) const;
587 virtual TestInstance *createInstance(Context &context) const;
588 virtual void checkSupport(Context &context) const;
589
590 protected:
591 const uintptr_t m_bufferSize;
592 const tcu::UVec3 m_workGroupSize;
593 const DispatchCommandsVec m_dispatchCommands;
594 const glu::GLSLVersion m_glslVersion;
595 const bool m_computeOnlyQueue;
596 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
597
598 private:
599 IndirectDispatchCaseBufferUpload(const vkt::TestCase &);
600 IndirectDispatchCaseBufferUpload &operator=(const vkt::TestCase &);
601 };
602
IndirectDispatchCaseBufferUpload(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)603 IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload(
604 tcu::TestContext &testCtx, const DispatchCaseDesc &caseDesc, const glu::GLSLVersion glslVersion,
605 const vk::ComputePipelineConstructionType computePipelineConstructionType)
606 : vkt::TestCase(testCtx, caseDesc.m_name)
607 , m_bufferSize(caseDesc.m_bufferSize)
608 , m_workGroupSize(caseDesc.m_workGroupSize)
609 , m_dispatchCommands(caseDesc.m_dispatchCommands)
610 , m_glslVersion(glslVersion)
611 , m_computeOnlyQueue(caseDesc.m_computeOnlyQueue)
612 , m_computePipelineConstructionType(computePipelineConstructionType)
613 {
614 }
615
initPrograms(vk::SourceCollections & programCollection) const616 void IndirectDispatchCaseBufferUpload::initPrograms(vk::SourceCollections &programCollection) const
617 {
618 const char *const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
619
620 std::ostringstream verifyBuffer;
621
622 verifyBuffer << versionDecl << "\n"
623 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = "
624 "${LOCAL_SIZE_Z}) in;\n"
625 << "layout(set = 0, binding = 0, std430) buffer Result\n"
626 << "{\n"
627 << " uvec3 expectedGroupCount;\n"
628 << " coherent uint numPassed;\n"
629 << "} result;\n"
630 << "void main (void)\n"
631 << "{\n"
632 << " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
633 << " atomicAdd(result.numPassed, 1u);\n"
634 << "}\n";
635
636 std::map<std::string, std::string> args;
637
638 args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
639 args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
640 args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
641
642 std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
643
644 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify")
645 << glu::ComputeSource(verifyProgramString);
646 }
647
createInstance(Context & context) const648 TestInstance *IndirectDispatchCaseBufferUpload::createInstance(Context &context) const
649 {
650 return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands,
651 m_computeOnlyQueue, m_computePipelineConstructionType);
652 }
653
checkSupport(Context & context) const654 void IndirectDispatchCaseBufferUpload::checkSupport(Context &context) const
655 {
656 // Find at least one queue family that supports compute queue but does NOT support graphics queue.
657 if (m_computeOnlyQueue)
658 {
659 bool foundQueue = false;
660 const std::vector<vk::VkQueueFamilyProperties> queueFamilies =
661 getPhysicalDeviceQueueFamilyProperties(context.getInstanceInterface(), context.getPhysicalDevice());
662
663 for (const auto &queueFamily : queueFamilies)
664 {
665 if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT &&
666 !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT))
667 {
668 foundQueue = true;
669 break;
670 }
671 }
672 if (!foundQueue)
673 TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
674 }
675
676 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
677 m_computePipelineConstructionType);
678 }
679
680 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
681 {
682 public:
IndirectDispatchInstanceBufferGenerate(Context & context,const std::string & name,const uintptr_t bufferSize,const tcu::UVec3 & workGroupSize,const DispatchCommandsVec & dispatchCommands,const bool computeOnlyQueue,const vk::ComputePipelineConstructionType computePipelineConstructionType)683 IndirectDispatchInstanceBufferGenerate(Context &context, const std::string &name, const uintptr_t bufferSize,
684 const tcu::UVec3 &workGroupSize, const DispatchCommandsVec &dispatchCommands,
685 const bool computeOnlyQueue,
686 const vk::ComputePipelineConstructionType computePipelineConstructionType)
687
688 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands,
689 computeOnlyQueue, computePipelineConstructionType)
690 {
691 }
692
~IndirectDispatchInstanceBufferGenerate(void)693 virtual ~IndirectDispatchInstanceBufferGenerate(void)
694 {
695 }
696
697 protected:
698 virtual void fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface &vkdi,
699 const vk::BufferWithMemory &indirectBuffer);
700
701 vk::Move<vk::VkDescriptorSetLayout> m_descriptorSetLayout;
702 vk::Move<vk::VkDescriptorPool> m_descriptorPool;
703 vk::Move<vk::VkDescriptorSet> m_descriptorSet;
704 vk::Move<vk::VkPipelineLayout> m_pipelineLayout;
705 vk::Move<vk::VkPipeline> m_computePipeline;
706
707 private:
708 IndirectDispatchInstanceBufferGenerate(const vkt::TestInstance &);
709 IndirectDispatchInstanceBufferGenerate &operator=(const vkt::TestInstance &);
710 };
711
fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,const vk::DeviceInterface & vkdi,const vk::BufferWithMemory & indirectBuffer)712 void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData(const vk::VkCommandBuffer commandBuffer,
713 const vk::DeviceInterface &vkdi,
714 const vk::BufferWithMemory &indirectBuffer)
715 {
716 // Create compute shader that generates data for indirect buffer
717 const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
718 vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
719
720 // Create descriptorSetLayout
721 m_descriptorSetLayout =
722 vk::DescriptorSetLayoutBuilder()
723 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT)
724 .build(vkdi, m_device);
725
726 // Create compute pipeline
727 m_pipelineLayout = makePipelineLayout(vkdi, m_device, *m_descriptorSetLayout);
728 m_computePipeline = makeComputePipeline(vkdi, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
729
730 // Create descriptor pool
731 m_descriptorPool = vk::DescriptorPoolBuilder()
732 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
733 .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
734
735 // Create descriptor set
736 m_descriptorSet = makeDescriptorSet(vkdi, m_device, *m_descriptorPool, *m_descriptorSetLayout);
737
738 const vk::VkDescriptorBufferInfo indirectDescriptorInfo =
739 makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
740
741 vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
742 descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u),
743 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
744 descriptorSetBuilder.update(vkdi, m_device);
745
746 const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
747 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
748
749 // Bind compute pipeline
750 vkdi.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
751
752 // Bind descriptor set
753 vkdi.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u,
754 &m_descriptorSet.get(), 0u, DE_NULL);
755
756 // Dispatch compute command
757 vkdi.cmdDispatch(commandBuffer, 1u, 1u, 1u);
758
759 // Insert memory barrier
760 vkdi.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
761 vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0, 0,
762 (const vk::VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
763 (const vk::VkImageMemoryBarrier *)DE_NULL);
764 }
765
766 class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
767 {
768 public:
IndirectDispatchCaseBufferGenerate(tcu::TestContext & testCtx,const DispatchCaseDesc & caseDesc,const glu::GLSLVersion glslVersion,const vk::ComputePipelineConstructionType computePipelineConstructionType)769 IndirectDispatchCaseBufferGenerate(tcu::TestContext &testCtx, const DispatchCaseDesc &caseDesc,
770 const glu::GLSLVersion glslVersion,
771 const vk::ComputePipelineConstructionType computePipelineConstructionType)
772 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion, computePipelineConstructionType)
773 {
774 }
775
~IndirectDispatchCaseBufferGenerate(void)776 virtual ~IndirectDispatchCaseBufferGenerate(void)
777 {
778 }
779
780 virtual void initPrograms(vk::SourceCollections &programCollection) const;
781 virtual TestInstance *createInstance(Context &context) const;
782
783 private:
784 IndirectDispatchCaseBufferGenerate(const vkt::TestCase &);
785 IndirectDispatchCaseBufferGenerate &operator=(const vkt::TestCase &);
786 };
787
initPrograms(vk::SourceCollections & programCollection) const788 void IndirectDispatchCaseBufferGenerate::initPrograms(vk::SourceCollections &programCollection) const
789 {
790 IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
791
792 const char *const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
793
794 std::ostringstream computeBuffer;
795
796 // Header
797 computeBuffer << versionDecl << "\n"
798 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
799 << "layout(set = 0, binding = 0, std430) buffer Out\n"
800 << "{\n"
801 << " highp uint data[];\n"
802 << "};\n"
803 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
804 << "{\n"
805 << " data[offset+0u] = numWorkGroups.x;\n"
806 << " data[offset+1u] = numWorkGroups.y;\n"
807 << " data[offset+2u] = numWorkGroups.z;\n"
808 << "}\n"
809 << "void main (void)\n"
810 << "{\n";
811
812 // Dispatch commands
813 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end();
814 ++cmdIter)
815 {
816 const uint32_t offs = (uint32_t)(cmdIter->m_offset / sizeof(uint32_t));
817 DE_ASSERT((size_t)offs * sizeof(uint32_t) == (size_t)cmdIter->m_offset);
818
819 computeBuffer << "\twriteCmd(" << offs << "u, uvec3(" << cmdIter->m_numWorkGroups.x() << "u, "
820 << cmdIter->m_numWorkGroups.y() << "u, " << cmdIter->m_numWorkGroups.z() << "u));\n";
821 }
822
823 // Ending
824 computeBuffer << "}\n";
825
826 std::string computeString = computeBuffer.str();
827
828 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
829 }
830
createInstance(Context & context) const831 TestInstance *IndirectDispatchCaseBufferGenerate::createInstance(Context &context) const
832 {
833 return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize,
834 m_dispatchCommands, m_computeOnlyQueue,
835 m_computePipelineConstructionType);
836 }
837
commandsVec(const DispatchCommand & cmd)838 DispatchCommandsVec commandsVec(const DispatchCommand &cmd)
839 {
840 DispatchCommandsVec vec;
841 vec.push_back(cmd);
842 return vec;
843 }
844
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4)845 DispatchCommandsVec commandsVec(const DispatchCommand &cmd0, const DispatchCommand &cmd1, const DispatchCommand &cmd2,
846 const DispatchCommand &cmd3, const DispatchCommand &cmd4)
847 {
848 DispatchCommandsVec vec;
849 vec.push_back(cmd0);
850 vec.push_back(cmd1);
851 vec.push_back(cmd2);
852 vec.push_back(cmd3);
853 vec.push_back(cmd4);
854 return vec;
855 }
856
commandsVec(const DispatchCommand & cmd0,const DispatchCommand & cmd1,const DispatchCommand & cmd2,const DispatchCommand & cmd3,const DispatchCommand & cmd4,const DispatchCommand & cmd5,const DispatchCommand & cmd6)857 DispatchCommandsVec commandsVec(const DispatchCommand &cmd0, const DispatchCommand &cmd1, const DispatchCommand &cmd2,
858 const DispatchCommand &cmd3, const DispatchCommand &cmd4, const DispatchCommand &cmd5,
859 const DispatchCommand &cmd6)
860 {
861 DispatchCommandsVec vec;
862 vec.push_back(cmd0);
863 vec.push_back(cmd1);
864 vec.push_back(cmd2);
865 vec.push_back(cmd3);
866 vec.push_back(cmd4);
867 vec.push_back(cmd5);
868 vec.push_back(cmd6);
869 return vec;
870 }
871
872 } // namespace
873
createIndirectComputeDispatchTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)874 tcu::TestCaseGroup *createIndirectComputeDispatchTests(
875 tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
876 {
877
878 static const DispatchCaseDesc s_dispatchCases[] = {
879 // Single invocation only from offset 0
880 DispatchCaseDesc("single_invocation", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
881 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))), false),
882 // Multiple groups dispatched from offset 0
883 DispatchCaseDesc("multiple_groups", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
884 commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))), false),
885 // Multiple groups of size 2x3x1 from offset 0
886 DispatchCaseDesc("multiple_groups_multiple_invocations", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
887 commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))), false),
888 DispatchCaseDesc("small_offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
889 commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))), false),
890 DispatchCaseDesc("large_offset", (2 << 20), tcu::UVec3(1, 1, 1),
891 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))), false),
892 DispatchCaseDesc("large_offset_multiple_invocations", (2 << 20), tcu::UVec3(2, 3, 1),
893 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))), false),
894 DispatchCaseDesc("empty_command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
895 commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))), false),
896 // Dispatch multiple compute commands from single buffer
897 DispatchCaseDesc("multi_dispatch", 1 << 10, tcu::UVec3(3, 1, 2),
898 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
899 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
900 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
901 DispatchCommand(40, tcu::UVec3(1, 1, 7)),
902 DispatchCommand(52, tcu::UVec3(1, 1, 4))),
903 false),
904 // Dispatch multiple compute commands from single buffer
905 DispatchCaseDesc("multi_dispatch_reuse_command", 1 << 10, tcu::UVec3(3, 1, 2),
906 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)), DispatchCommand(0, tcu::UVec3(1, 1, 1)),
907 DispatchCommand(0, tcu::UVec3(1, 1, 1)), DispatchCommand(104, tcu::UVec3(1, 3, 1)),
908 DispatchCommand(104, tcu::UVec3(1, 3, 1)),
909 DispatchCommand(52, tcu::UVec3(1, 1, 4)),
910 DispatchCommand(52, tcu::UVec3(1, 1, 4))),
911 false),
912 };
913
914 de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch"));
915
916 tcu::TestCaseGroup *const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer");
917 indirectComputeDispatchTests->addChild(groupBufferUpload);
918
919 for (uint32_t ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
920 {
921 DispatchCaseDesc caseDesc = s_dispatchCases[ndx];
922 std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
923 DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(
924 computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize, caseDesc.m_dispatchCommands, true);
925 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glu::GLSL_VERSION_310_ES,
926 computePipelineConstructionType));
927 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(
928 testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
929 }
930
931 tcu::TestCaseGroup *const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute");
932 indirectComputeDispatchTests->addChild(groupBufferGenerate);
933
934 for (uint32_t ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
935 {
936 DispatchCaseDesc caseDesc = s_dispatchCases[ndx];
937 std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue");
938 DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(
939 computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize, caseDesc.m_dispatchCommands, true);
940 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(
941 testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
942 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(
943 testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType));
944 }
945
946 return indirectComputeDispatchTests.release();
947 }
948
949 } // namespace compute
950 } // namespace vkt
951