1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 The Android Open Source Project
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief Compute Shader Tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeBasicComputeShaderTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktComputeTestsUtil.hpp"
31 #include "vktCustomInstancesDevices.hpp"
32 #include "vktAmberTestCase.hpp"
33
34 #include "vkDefs.hpp"
35 #include "vkRef.hpp"
36 #include "vkRefUtil.hpp"
37 #include "vkPlatform.hpp"
38 #include "vkPrograms.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkBarrierUtil.hpp"
42 #include "vkQueryUtil.hpp"
43 #include "vkBuilderUtil.hpp"
44 #include "vkTypeUtil.hpp"
45 #include "vkDeviceUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkObjUtil.hpp"
48 #include "vkBufferWithMemory.hpp"
49 #include "vkSafetyCriticalUtil.hpp"
50 #include "vkImageWithMemory.hpp"
51
52 #include "tcuCommandLine.hpp"
53 #include "tcuTestLog.hpp"
54 #include "tcuMaybe.hpp"
55
56 #include "deStringUtil.hpp"
57 #include "deUniquePtr.hpp"
58 #include "deRandom.hpp"
59
60 #include <vector>
61 #include <memory>
62
63 using namespace vk;
64
65 namespace vkt
66 {
67 namespace compute
68 {
69 namespace
70 {
71
72 template <typename T, int size>
multiplyComponents(const tcu::Vector<T,size> & v)73 T multiplyComponents(const tcu::Vector<T, size> &v)
74 {
75 T accum = 1;
76 for (int i = 0; i < size; ++i)
77 accum *= v[i];
78 return accum;
79 }
80
81 template <typename T>
squared(const T & a)82 inline T squared(const T &a)
83 {
84 return a * a;
85 }
86
make2DImageCreateInfo(const tcu::IVec2 & imageSize,const VkImageUsageFlags usage)87 inline VkImageCreateInfo make2DImageCreateInfo(const tcu::IVec2 &imageSize, const VkImageUsageFlags usage)
88 {
89 const VkImageCreateInfo imageParams = {
90 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
91 DE_NULL, // const void* pNext;
92 0u, // VkImageCreateFlags flags;
93 VK_IMAGE_TYPE_2D, // VkImageType imageType;
94 VK_FORMAT_R32_UINT, // VkFormat format;
95 vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), // VkExtent3D extent;
96 1u, // uint32_t mipLevels;
97 1u, // uint32_t arrayLayers;
98 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
99 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
100 usage, // VkImageUsageFlags usage;
101 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
102 0u, // uint32_t queueFamilyIndexCount;
103 DE_NULL, // const uint32_t* pQueueFamilyIndices;
104 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
105 };
106 return imageParams;
107 }
108
makeBufferImageCopy(const tcu::IVec2 & imageSize)109 inline VkBufferImageCopy makeBufferImageCopy(const tcu::IVec2 &imageSize)
110 {
111 return compute::makeBufferImageCopy(vk::makeExtent3D(imageSize.x(), imageSize.y(), 1), 1u);
112 }
113
114 enum BufferType
115 {
116 BUFFER_TYPE_UNIFORM,
117 BUFFER_TYPE_SSBO,
118 };
119
120 class SharedVarTest : public vkt::TestCase
121 {
122 public:
123 SharedVarTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
124 const tcu::IVec3 &workSize,
125 const vk::ComputePipelineConstructionType computePipelineConstructionType);
126
127 virtual void checkSupport(Context &context) const;
128 void initPrograms(SourceCollections &sourceCollections) const;
129 TestInstance *createInstance(Context &context) const;
130
131 private:
132 const tcu::IVec3 m_localSize;
133 const tcu::IVec3 m_workSize;
134 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
135 };
136
137 class SharedVarTestInstance : public vkt::TestInstance
138 {
139 public:
140 SharedVarTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
141 const vk::ComputePipelineConstructionType computePipelineConstructionType);
142
143 tcu::TestStatus iterate(void);
144
145 private:
146 const tcu::IVec3 m_localSize;
147 const tcu::IVec3 m_workSize;
148 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
149 };
150
SharedVarTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)151 SharedVarTest::SharedVarTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
152 const tcu::IVec3 &workSize,
153 const vk::ComputePipelineConstructionType computePipelineConstructionType)
154 : TestCase(testCtx, name)
155 , m_localSize(localSize)
156 , m_workSize(workSize)
157 , m_computePipelineConstructionType(computePipelineConstructionType)
158 {
159 }
160
checkSupport(Context & context) const161 void SharedVarTest::checkSupport(Context &context) const
162 {
163 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
164 m_computePipelineConstructionType);
165 }
166
initPrograms(SourceCollections & sourceCollections) const167 void SharedVarTest::initPrograms(SourceCollections &sourceCollections) const
168 {
169 const int workGroupSize = multiplyComponents(m_localSize);
170 const int workGroupCount = multiplyComponents(m_workSize);
171 const int numValues = workGroupSize * workGroupCount;
172
173 std::ostringstream src;
174 src << "#version 310 es\n"
175 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
176 << ", local_size_z = " << m_localSize.z() << ") in;\n"
177 << "layout(binding = 0) writeonly buffer Output {\n"
178 << " uint values[" << numValues << "];\n"
179 << "} sb_out;\n\n"
180 << "shared uint offsets[" << workGroupSize << "];\n\n"
181 << "void main (void) {\n"
182 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
183 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
184 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
185 << " uint globalOffs = localSize*globalNdx;\n"
186 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + "
187 "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
188 << "\n"
189 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
190 << " memoryBarrierShared();\n"
191 << " barrier();\n"
192 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
193 << "}\n";
194
195 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
196 }
197
createInstance(Context & context) const198 TestInstance *SharedVarTest::createInstance(Context &context) const
199 {
200 return new SharedVarTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
201 }
202
SharedVarTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)203 SharedVarTestInstance::SharedVarTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
204 const vk::ComputePipelineConstructionType computePipelineConstructionType)
205 : TestInstance(context)
206 , m_localSize(localSize)
207 , m_workSize(workSize)
208 , m_computePipelineConstructionType(computePipelineConstructionType)
209 {
210 }
211
iterate(void)212 tcu::TestStatus SharedVarTestInstance::iterate(void)
213 {
214 const DeviceInterface &vk = m_context.getDeviceInterface();
215 const VkDevice device = m_context.getDevice();
216 const VkQueue queue = m_context.getUniversalQueue();
217 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
218 Allocator &allocator = m_context.getDefaultAllocator();
219
220 const int workGroupSize = multiplyComponents(m_localSize);
221 const int workGroupCount = multiplyComponents(m_workSize);
222
223 // Create a buffer and host-visible memory for it
224
225 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * workGroupSize * workGroupCount;
226 const BufferWithMemory buffer(vk, device, allocator,
227 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
228 MemoryRequirement::HostVisible);
229
230 // Create descriptor set
231
232 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
233 DescriptorSetLayoutBuilder()
234 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
235 .build(vk, device));
236
237 const Unique<VkDescriptorPool> descriptorPool(
238 DescriptorPoolBuilder()
239 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
240 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
241
242 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
243
244 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
245 DescriptorSetUpdateBuilder()
246 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
247 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
248 .update(vk, device);
249
250 // Perform the computation
251
252 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
253 m_context.getBinaryCollection().get("comp"));
254 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
255 pipeline.buildPipeline();
256
257 const VkBufferMemoryBarrier computeFinishBarrier =
258 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
259
260 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
261 const Unique<VkCommandBuffer> cmdBuffer(
262 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
263
264 // Start recording commands
265
266 beginCommandBuffer(vk, *cmdBuffer);
267
268 pipeline.bind(*cmdBuffer);
269 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
270 &descriptorSet.get(), 0u, DE_NULL);
271
272 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
273
274 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
275 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &computeFinishBarrier, 0,
276 (const VkImageMemoryBarrier *)DE_NULL);
277
278 endCommandBuffer(vk, *cmdBuffer);
279
280 // Wait for completion
281
282 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
283
284 // Validate the results
285
286 const Allocation &bufferAllocation = buffer.getAllocation();
287 invalidateAlloc(vk, device, bufferAllocation);
288
289 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
290
291 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
292 {
293 const int globalOffset = groupNdx * workGroupSize;
294 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
295 {
296 const uint32_t res = bufferPtr[globalOffset + localOffset];
297 const uint32_t ref = globalOffset + squared(workGroupSize - localOffset - 1);
298
299 if (res != ref)
300 {
301 std::ostringstream msg;
302 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
303 return tcu::TestStatus::fail(msg.str());
304 }
305 }
306 }
307 return tcu::TestStatus::pass("Compute succeeded");
308 }
309
310 class SharedVarAtomicOpTest : public vkt::TestCase
311 {
312 public:
313 SharedVarAtomicOpTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
314 const tcu::IVec3 &workSize,
315 const vk::ComputePipelineConstructionType computePipelineConstructionType);
316
317 virtual void checkSupport(Context &context) const;
318 void initPrograms(SourceCollections &sourceCollections) const;
319 TestInstance *createInstance(Context &context) const;
320
321 private:
322 const tcu::IVec3 m_localSize;
323 const tcu::IVec3 m_workSize;
324 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
325 };
326
327 class SharedVarAtomicOpTestInstance : public vkt::TestInstance
328 {
329 public:
330 SharedVarAtomicOpTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
331 const vk::ComputePipelineConstructionType computePipelineConstructionType);
332
333 tcu::TestStatus iterate(void);
334
335 private:
336 const tcu::IVec3 m_localSize;
337 const tcu::IVec3 m_workSize;
338 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
339 };
340
SharedVarAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)341 SharedVarAtomicOpTest::SharedVarAtomicOpTest(tcu::TestContext &testCtx, const std::string &name,
342 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
343 const vk::ComputePipelineConstructionType computePipelineConstructionType)
344 : TestCase(testCtx, name)
345 , m_localSize(localSize)
346 , m_workSize(workSize)
347 , m_computePipelineConstructionType(computePipelineConstructionType)
348 {
349 }
350
checkSupport(Context & context) const351 void SharedVarAtomicOpTest::checkSupport(Context &context) const
352 {
353 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
354 m_computePipelineConstructionType);
355 }
356
initPrograms(SourceCollections & sourceCollections) const357 void SharedVarAtomicOpTest::initPrograms(SourceCollections &sourceCollections) const
358 {
359 const int workGroupSize = multiplyComponents(m_localSize);
360 const int workGroupCount = multiplyComponents(m_workSize);
361 const int numValues = workGroupSize * workGroupCount;
362
363 std::ostringstream src;
364 src << "#version 310 es\n"
365 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
366 << ", local_size_z = " << m_localSize.z() << ") in;\n"
367 << "layout(binding = 0) writeonly buffer Output {\n"
368 << " uint values[" << numValues << "];\n"
369 << "} sb_out;\n\n"
370 << "shared uint count;\n\n"
371 << "void main (void) {\n"
372 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
373 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
374 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
375 << " uint globalOffs = localSize*globalNdx;\n"
376 << "\n"
377 << " count = 0u;\n"
378 << " memoryBarrierShared();\n"
379 << " barrier();\n"
380 << " uint oldVal = atomicAdd(count, 1u);\n"
381 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
382 << "}\n";
383
384 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
385 }
386
createInstance(Context & context) const387 TestInstance *SharedVarAtomicOpTest::createInstance(Context &context) const
388 {
389 return new SharedVarAtomicOpTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
390 }
391
SharedVarAtomicOpTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)392 SharedVarAtomicOpTestInstance::SharedVarAtomicOpTestInstance(
393 Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
394 const vk::ComputePipelineConstructionType computePipelineConstructionType)
395 : TestInstance(context)
396 , m_localSize(localSize)
397 , m_workSize(workSize)
398 , m_computePipelineConstructionType(computePipelineConstructionType)
399 {
400 }
401
iterate(void)402 tcu::TestStatus SharedVarAtomicOpTestInstance::iterate(void)
403 {
404 const DeviceInterface &vk = m_context.getDeviceInterface();
405 const VkDevice device = m_context.getDevice();
406 const VkQueue queue = m_context.getUniversalQueue();
407 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
408 Allocator &allocator = m_context.getDefaultAllocator();
409
410 const int workGroupSize = multiplyComponents(m_localSize);
411 const int workGroupCount = multiplyComponents(m_workSize);
412
413 // Create a buffer and host-visible memory for it
414
415 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * workGroupSize * workGroupCount;
416 const BufferWithMemory buffer(vk, device, allocator,
417 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
418 MemoryRequirement::HostVisible);
419
420 // Create descriptor set
421
422 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
423 DescriptorSetLayoutBuilder()
424 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
425 .build(vk, device));
426
427 const Unique<VkDescriptorPool> descriptorPool(
428 DescriptorPoolBuilder()
429 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
430 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
431
432 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
433
434 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
435 DescriptorSetUpdateBuilder()
436 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
437 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
438 .update(vk, device);
439
440 // Perform the computation
441
442 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
443 m_context.getBinaryCollection().get("comp"));
444 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
445 pipeline.buildPipeline();
446
447 const VkBufferMemoryBarrier computeFinishBarrier =
448 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
449
450 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
451 const Unique<VkCommandBuffer> cmdBuffer(
452 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
453
454 // Start recording commands
455
456 beginCommandBuffer(vk, *cmdBuffer);
457
458 pipeline.bind(*cmdBuffer);
459 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
460 &descriptorSet.get(), 0u, DE_NULL);
461
462 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
463
464 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
465 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1u, &computeFinishBarrier, 0,
466 (const VkImageMemoryBarrier *)DE_NULL);
467
468 endCommandBuffer(vk, *cmdBuffer);
469
470 // Wait for completion
471
472 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
473
474 // Validate the results
475
476 const Allocation &bufferAllocation = buffer.getAllocation();
477 invalidateAlloc(vk, device, bufferAllocation);
478
479 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
480
481 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
482 {
483 const int globalOffset = groupNdx * workGroupSize;
484 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
485 {
486 const uint32_t res = bufferPtr[globalOffset + localOffset];
487 const uint32_t ref = localOffset + 1;
488
489 if (res != ref)
490 {
491 std::ostringstream msg;
492 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
493 return tcu::TestStatus::fail(msg.str());
494 }
495 }
496 }
497 return tcu::TestStatus::pass("Compute succeeded");
498 }
499
500 class SSBOLocalBarrierTest : public vkt::TestCase
501 {
502 public:
503 SSBOLocalBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &localSize,
504 const tcu::IVec3 &workSize,
505 const vk::ComputePipelineConstructionType computePipelineConstructionType);
506
507 virtual void checkSupport(Context &context) const;
508 void initPrograms(SourceCollections &sourceCollections) const;
509 TestInstance *createInstance(Context &context) const;
510
511 private:
512 const tcu::IVec3 m_localSize;
513 const tcu::IVec3 m_workSize;
514 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
515 };
516
517 class SSBOLocalBarrierTestInstance : public vkt::TestInstance
518 {
519 public:
520 SSBOLocalBarrierTestInstance(Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
521 const vk::ComputePipelineConstructionType computePipelineConstructionType);
522
523 tcu::TestStatus iterate(void);
524
525 private:
526 const tcu::IVec3 m_localSize;
527 const tcu::IVec3 m_workSize;
528 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
529 };
530
SSBOLocalBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)531 SSBOLocalBarrierTest::SSBOLocalBarrierTest(tcu::TestContext &testCtx, const std::string &name,
532 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
533 const vk::ComputePipelineConstructionType computePipelineConstructionType)
534 : TestCase(testCtx, name)
535 , m_localSize(localSize)
536 , m_workSize(workSize)
537 , m_computePipelineConstructionType(computePipelineConstructionType)
538 {
539 }
540
checkSupport(Context & context) const541 void SSBOLocalBarrierTest::checkSupport(Context &context) const
542 {
543 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
544 m_computePipelineConstructionType);
545 }
546
initPrograms(SourceCollections & sourceCollections) const547 void SSBOLocalBarrierTest::initPrograms(SourceCollections &sourceCollections) const
548 {
549 const int workGroupSize = multiplyComponents(m_localSize);
550 const int workGroupCount = multiplyComponents(m_workSize);
551 const int numValues = workGroupSize * workGroupCount;
552
553 std::ostringstream src;
554 src << "#version 310 es\n"
555 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
556 << ", local_size_z = " << m_localSize.z() << ") in;\n"
557 << "layout(binding = 0) coherent buffer Output {\n"
558 << " uint values[" << numValues << "];\n"
559 << "} sb_out;\n\n"
560 << "void main (void) {\n"
561 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
562 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
563 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
564 << " uint globalOffs = localSize*globalNdx;\n"
565 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + "
566 "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
567 << "\n"
568 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n"
569 << " memoryBarrierBuffer();\n"
570 << " barrier();\n"
571 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" // += so we read and write
572 << " memoryBarrierBuffer();\n"
573 << " barrier();\n"
574 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
575 << "}\n";
576
577 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
578 }
579
createInstance(Context & context) const580 TestInstance *SSBOLocalBarrierTest::createInstance(Context &context) const
581 {
582 return new SSBOLocalBarrierTestInstance(context, m_localSize, m_workSize, m_computePipelineConstructionType);
583 }
584
SSBOLocalBarrierTestInstance(Context & context,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)585 SSBOLocalBarrierTestInstance::SSBOLocalBarrierTestInstance(
586 Context &context, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
587 const vk::ComputePipelineConstructionType computePipelineConstructionType)
588 : TestInstance(context)
589 , m_localSize(localSize)
590 , m_workSize(workSize)
591 , m_computePipelineConstructionType(computePipelineConstructionType)
592 {
593 }
594
iterate(void)595 tcu::TestStatus SSBOLocalBarrierTestInstance::iterate(void)
596 {
597 const DeviceInterface &vk = m_context.getDeviceInterface();
598 const VkDevice device = m_context.getDevice();
599 const VkQueue queue = m_context.getUniversalQueue();
600 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
601 Allocator &allocator = m_context.getDefaultAllocator();
602
603 const int workGroupSize = multiplyComponents(m_localSize);
604 const int workGroupCount = multiplyComponents(m_workSize);
605
606 // Create a buffer and host-visible memory for it
607
608 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * workGroupSize * workGroupCount;
609 const BufferWithMemory buffer(vk, device, allocator,
610 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
611 MemoryRequirement::HostVisible);
612
613 // Create descriptor set
614
615 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
616 DescriptorSetLayoutBuilder()
617 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
618 .build(vk, device));
619
620 const Unique<VkDescriptorPool> descriptorPool(
621 DescriptorPoolBuilder()
622 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
623 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
624
625 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
626
627 const VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
628 DescriptorSetUpdateBuilder()
629 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
630 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo)
631 .update(vk, device);
632
633 // Perform the computation
634
635 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
636 m_context.getBinaryCollection().get("comp"));
637 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
638 pipeline.buildPipeline();
639
640 const VkBufferMemoryBarrier computeFinishBarrier =
641 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
642
643 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
644 const Unique<VkCommandBuffer> cmdBuffer(
645 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
646
647 // Start recording commands
648
649 beginCommandBuffer(vk, *cmdBuffer);
650
651 pipeline.bind(*cmdBuffer);
652 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
653 &descriptorSet.get(), 0u, DE_NULL);
654
655 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
656
657 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
658 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &computeFinishBarrier, 0,
659 (const VkImageMemoryBarrier *)DE_NULL);
660
661 endCommandBuffer(vk, *cmdBuffer);
662
663 // Wait for completion
664
665 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
666
667 // Validate the results
668
669 const Allocation &bufferAllocation = buffer.getAllocation();
670 invalidateAlloc(vk, device, bufferAllocation);
671
672 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
673
674 for (int groupNdx = 0; groupNdx < workGroupCount; ++groupNdx)
675 {
676 const int globalOffset = groupNdx * workGroupSize;
677 for (int localOffset = 0; localOffset < workGroupSize; ++localOffset)
678 {
679 const uint32_t res = bufferPtr[globalOffset + localOffset];
680 const int offs0 = localOffset - 1 < 0 ? ((localOffset + workGroupSize - 1) % workGroupSize) :
681 ((localOffset - 1) % workGroupSize);
682 const int offs1 = localOffset - 2 < 0 ? ((localOffset + workGroupSize - 2) % workGroupSize) :
683 ((localOffset - 2) % workGroupSize);
684 const uint32_t ref = static_cast<uint32_t>(globalOffset + offs0 + offs1);
685
686 if (res != ref)
687 {
688 std::ostringstream msg;
689 msg << "Comparison failed for Output.values[" << (globalOffset + localOffset) << "]";
690 return tcu::TestStatus::fail(msg.str());
691 }
692 }
693 }
694 return tcu::TestStatus::pass("Compute succeeded");
695 }
696
697 class CopyImageToSSBOTest : public vkt::TestCase
698 {
699 public:
700 CopyImageToSSBOTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &localSize,
701 const tcu::IVec2 &imageSize,
702 const vk::ComputePipelineConstructionType computePipelineConstructionType);
703
704 virtual void checkSupport(Context &context) const;
705 void initPrograms(SourceCollections &sourceCollections) const;
706 TestInstance *createInstance(Context &context) const;
707
708 private:
709 const tcu::IVec2 m_localSize;
710 const tcu::IVec2 m_imageSize;
711 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
712 };
713
714 class CopyImageToSSBOTestInstance : public vkt::TestInstance
715 {
716 public:
717 CopyImageToSSBOTestInstance(Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
718 const vk::ComputePipelineConstructionType computePipelineConstructionType);
719
720 tcu::TestStatus iterate(void);
721
722 private:
723 const tcu::IVec2 m_localSize;
724 const tcu::IVec2 m_imageSize;
725 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
726 };
727
CopyImageToSSBOTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)728 CopyImageToSSBOTest::CopyImageToSSBOTest(tcu::TestContext &testCtx, const std::string &name,
729 const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
730 const vk::ComputePipelineConstructionType computePipelineConstructionType)
731 : TestCase(testCtx, name)
732 , m_localSize(localSize)
733 , m_imageSize(imageSize)
734 , m_computePipelineConstructionType(computePipelineConstructionType)
735 {
736 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
737 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
738 }
739
checkSupport(Context & context) const740 void CopyImageToSSBOTest::checkSupport(Context &context) const
741 {
742 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
743 m_computePipelineConstructionType);
744 }
745
initPrograms(SourceCollections & sourceCollections) const746 void CopyImageToSSBOTest::initPrograms(SourceCollections &sourceCollections) const
747 {
748 std::ostringstream src;
749 src << "#version 310 es\n"
750 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
751 << "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_srcImg;\n"
752 << "layout(binding = 0) writeonly buffer Output {\n"
753 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
754 << "} sb_out;\n\n"
755 << "void main (void) {\n"
756 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
757 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
758 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
759 << "}\n";
760
761 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
762 }
763
createInstance(Context & context) const764 TestInstance *CopyImageToSSBOTest::createInstance(Context &context) const
765 {
766 return new CopyImageToSSBOTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
767 }
768
CopyImageToSSBOTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)769 CopyImageToSSBOTestInstance::CopyImageToSSBOTestInstance(
770 Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
771 const vk::ComputePipelineConstructionType computePipelineConstructionType)
772 : TestInstance(context)
773 , m_localSize(localSize)
774 , m_imageSize(imageSize)
775 , m_computePipelineConstructionType(computePipelineConstructionType)
776 {
777 }
778
iterate(void)779 tcu::TestStatus CopyImageToSSBOTestInstance::iterate(void)
780 {
781 const DeviceInterface &vk = m_context.getDeviceInterface();
782 const VkDevice device = m_context.getDevice();
783 const VkQueue queue = m_context.getUniversalQueue();
784 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
785 Allocator &allocator = m_context.getDefaultAllocator();
786
787 // Create an image
788
789 const VkImageCreateInfo imageParams =
790 make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
791 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
792
793 const VkImageSubresourceRange subresourceRange =
794 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
795 const Unique<VkImageView> imageView(
796 makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
797
798 // Staging buffer (source data for image)
799
800 const uint32_t imageArea = multiplyComponents(m_imageSize);
801 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * imageArea;
802
803 const BufferWithMemory stagingBuffer(vk, device, allocator,
804 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
805 MemoryRequirement::HostVisible);
806
807 // Populate the staging buffer with test data
808 {
809 de::Random rnd(0xab2c7);
810 const Allocation &stagingBufferAllocation = stagingBuffer.getAllocation();
811 uint32_t *bufferPtr = static_cast<uint32_t *>(stagingBufferAllocation.getHostPtr());
812 for (uint32_t i = 0; i < imageArea; ++i)
813 *bufferPtr++ = rnd.getUint32();
814
815 flushAlloc(vk, device, stagingBufferAllocation);
816 }
817
818 // Create a buffer to store shader output
819
820 const BufferWithMemory outputBuffer(vk, device, allocator,
821 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
822 MemoryRequirement::HostVisible);
823
824 // Create descriptor set
825
826 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
827 DescriptorSetLayoutBuilder()
828 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
829 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
830 .build(vk, device));
831
832 const Unique<VkDescriptorPool> descriptorPool(
833 DescriptorPoolBuilder()
834 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
835 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
836 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
837
838 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
839
840 // Set the bindings
841
842 const VkDescriptorImageInfo imageDescriptorInfo =
843 makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
844 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
845
846 DescriptorSetUpdateBuilder()
847 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
848 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
849 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
850 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
851 .update(vk, device);
852
853 // Perform the computation
854 {
855 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
856 m_context.getBinaryCollection().get("comp"));
857 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
858 pipeline.buildPipeline();
859
860 const VkBufferMemoryBarrier computeFinishBarrier = makeBufferMemoryBarrier(
861 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
862 const tcu::IVec2 workSize = m_imageSize / m_localSize;
863
864 // Prepare the command buffer
865
866 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
867 const Unique<VkCommandBuffer> cmdBuffer(
868 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
869
870 // Start recording commands
871
872 beginCommandBuffer(vk, *cmdBuffer);
873
874 pipeline.bind(*cmdBuffer);
875 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
876 &descriptorSet.get(), 0u, DE_NULL);
877
878 const std::vector<VkBufferImageCopy> bufferImageCopy(1, makeBufferImageCopy(m_imageSize));
879 copyBufferToImage(vk, *cmdBuffer, *stagingBuffer, bufferSizeBytes, bufferImageCopy, VK_IMAGE_ASPECT_COLOR_BIT,
880 1, 1, *image, VK_IMAGE_LAYOUT_GENERAL, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
881
882 vk.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
883 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
884 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &computeFinishBarrier, 0,
885 (const VkImageMemoryBarrier *)DE_NULL);
886
887 endCommandBuffer(vk, *cmdBuffer);
888
889 // Wait for completion
890
891 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
892 }
893
894 // Validate the results
895
896 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
897 invalidateAlloc(vk, device, outputBufferAllocation);
898
899 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
900 const uint32_t *refBufferPtr = static_cast<uint32_t *>(stagingBuffer.getAllocation().getHostPtr());
901
902 for (uint32_t ndx = 0; ndx < imageArea; ++ndx)
903 {
904 const uint32_t res = *(bufferPtr + ndx);
905 const uint32_t ref = *(refBufferPtr + ndx);
906
907 if (res != ref)
908 {
909 std::ostringstream msg;
910 msg << "Comparison failed for Output.values[" << ndx << "]";
911 return tcu::TestStatus::fail(msg.str());
912 }
913 }
914 return tcu::TestStatus::pass("Compute succeeded");
915 }
916
917 class CopySSBOToImageTest : public vkt::TestCase
918 {
919 public:
920 CopySSBOToImageTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &localSize,
921 const tcu::IVec2 &imageSize,
922 const vk::ComputePipelineConstructionType computePipelineConstructionType);
923
924 virtual void checkSupport(Context &context) const;
925 void initPrograms(SourceCollections &sourceCollections) const;
926 TestInstance *createInstance(Context &context) const;
927
928 private:
929 const tcu::IVec2 m_localSize;
930 const tcu::IVec2 m_imageSize;
931 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
932 };
933
934 class CopySSBOToImageTestInstance : public vkt::TestInstance
935 {
936 public:
937 CopySSBOToImageTestInstance(Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
938 const vk::ComputePipelineConstructionType computePipelineConstructionType);
939
940 tcu::TestStatus iterate(void);
941
942 private:
943 const tcu::IVec2 m_localSize;
944 const tcu::IVec2 m_imageSize;
945 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
946 };
947
CopySSBOToImageTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)948 CopySSBOToImageTest::CopySSBOToImageTest(tcu::TestContext &testCtx, const std::string &name,
949 const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
950 const vk::ComputePipelineConstructionType computePipelineConstructionType)
951 : TestCase(testCtx, name)
952 , m_localSize(localSize)
953 , m_imageSize(imageSize)
954 , m_computePipelineConstructionType(computePipelineConstructionType)
955 {
956 DE_ASSERT(m_imageSize.x() % m_localSize.x() == 0);
957 DE_ASSERT(m_imageSize.y() % m_localSize.y() == 0);
958 }
959
checkSupport(Context & context) const960 void CopySSBOToImageTest::checkSupport(Context &context) const
961 {
962 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
963 m_computePipelineConstructionType);
964 }
965
initPrograms(SourceCollections & sourceCollections) const966 void CopySSBOToImageTest::initPrograms(SourceCollections &sourceCollections) const
967 {
968 std::ostringstream src;
969 src << "#version 310 es\n"
970 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y() << ") in;\n"
971 << "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_dstImg;\n"
972 << "layout(binding = 0) readonly buffer Input {\n"
973 << " uint values[" << (m_imageSize.x() * m_imageSize.y()) << "];\n"
974 << "} sb_in;\n\n"
975 << "void main (void) {\n"
976 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
977 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
978 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
979 << "}\n";
980
981 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
982 }
983
createInstance(Context & context) const984 TestInstance *CopySSBOToImageTest::createInstance(Context &context) const
985 {
986 return new CopySSBOToImageTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
987 }
988
CopySSBOToImageTestInstance(Context & context,const tcu::IVec2 & localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)989 CopySSBOToImageTestInstance::CopySSBOToImageTestInstance(
990 Context &context, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize,
991 const vk::ComputePipelineConstructionType computePipelineConstructionType)
992 : TestInstance(context)
993 , m_localSize(localSize)
994 , m_imageSize(imageSize)
995 , m_computePipelineConstructionType(computePipelineConstructionType)
996 {
997 }
998
iterate(void)999 tcu::TestStatus CopySSBOToImageTestInstance::iterate(void)
1000 {
1001 ContextCommonData data = m_context.getContextCommonData();
1002 const DeviceInterface &vkd = data.vkd;
1003
1004 // Create an image, a view, and the output buffer
1005 const VkImageSubresourceRange subresourceRange =
1006 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1007 ImageWithBuffer imageWithBuffer(
1008 vkd, data.device, data.allocator, vk::makeExtent3D(m_imageSize.x(), m_imageSize.y(), 1), VK_FORMAT_R32_UINT,
1009 VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT, vk::VK_IMAGE_TYPE_2D, subresourceRange);
1010
1011 const uint32_t imageArea = multiplyComponents(m_imageSize);
1012 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * imageArea;
1013
1014 const BufferWithMemory inputBuffer(vkd, data.device, data.allocator,
1015 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1016 MemoryRequirement::HostVisible);
1017
1018 // Populate the buffer with test data
1019 {
1020 de::Random rnd(0x77238ac2);
1021 const Allocation &inputBufferAllocation = inputBuffer.getAllocation();
1022 uint32_t *bufferPtr = static_cast<uint32_t *>(inputBufferAllocation.getHostPtr());
1023 for (uint32_t i = 0; i < imageArea; ++i)
1024 *bufferPtr++ = rnd.getUint32();
1025
1026 flushAlloc(vkd, data.device, inputBufferAllocation);
1027 }
1028
1029 // Create descriptor set
1030 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1031 DescriptorSetLayoutBuilder()
1032 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1033 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
1034 .build(vkd, data.device));
1035
1036 const Unique<VkDescriptorPool> descriptorPool(
1037 DescriptorPoolBuilder()
1038 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1039 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1040 .build(vkd, data.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1041
1042 const Unique<VkDescriptorSet> descriptorSet(
1043 makeDescriptorSet(vkd, data.device, *descriptorPool, *descriptorSetLayout));
1044
1045 // Set the bindings
1046
1047 const VkDescriptorImageInfo imageDescriptorInfo =
1048 makeDescriptorImageInfo(DE_NULL, imageWithBuffer.getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1049 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1050
1051 DescriptorSetUpdateBuilder()
1052 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1053 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1054 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1055 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
1056 .update(vkd, data.device);
1057
1058 // Perform the computation
1059 {
1060 ComputePipelineWrapper pipeline(vkd, data.device, m_computePipelineConstructionType,
1061 m_context.getBinaryCollection().get("comp"));
1062 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1063 pipeline.buildPipeline();
1064
1065 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(
1066 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1067
1068 const VkImageMemoryBarrier imageLayoutBarrier =
1069 makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL,
1070 imageWithBuffer.getImage(), subresourceRange);
1071
1072 const tcu::IVec2 workSize = m_imageSize / m_localSize;
1073
1074 // Prepare the command buffer
1075
1076 const Unique<VkCommandPool> cmdPool(makeCommandPool(vkd, data.device, data.qfIndex));
1077 const Unique<VkCommandBuffer> cmdBuffer(
1078 allocateCommandBuffer(vkd, data.device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1079
1080 // Start recording commands
1081
1082 beginCommandBuffer(vkd, *cmdBuffer);
1083
1084 pipeline.bind(*cmdBuffer);
1085 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1086 &descriptorSet.get(), 0u, DE_NULL);
1087
1088 vkd.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1089 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1,
1090 &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
1091 vkd.cmdDispatch(*cmdBuffer, workSize.x(), workSize.y(), 1u);
1092
1093 copyImageToBuffer(vkd, *cmdBuffer, imageWithBuffer.getImage(), imageWithBuffer.getBuffer(), m_imageSize,
1094 VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
1095
1096 endCommandBuffer(vkd, *cmdBuffer);
1097
1098 // Wait for completion
1099
1100 submitCommandsAndWait(vkd, data.device, data.queue, *cmdBuffer);
1101 }
1102
1103 // Validate the results
1104
1105 const Allocation &outputBufferAllocation = imageWithBuffer.getBufferAllocation();
1106 invalidateAlloc(vkd, data.device, outputBufferAllocation);
1107
1108 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
1109 const uint32_t *refBufferPtr = static_cast<uint32_t *>(inputBuffer.getAllocation().getHostPtr());
1110
1111 for (uint32_t ndx = 0; ndx < imageArea; ++ndx)
1112 {
1113 const uint32_t res = *(bufferPtr + ndx);
1114 const uint32_t ref = *(refBufferPtr + ndx);
1115
1116 if (res != ref)
1117 {
1118 std::ostringstream msg;
1119 msg << "Comparison failed for pixel " << ndx;
1120 return tcu::TestStatus::fail(msg.str());
1121 }
1122 }
1123 return tcu::TestStatus::pass("Compute succeeded");
1124 }
1125
1126 class BufferToBufferInvertTest : public vkt::TestCase
1127 {
1128 public:
1129 virtual void checkSupport(Context &context) const;
1130 void initPrograms(SourceCollections &sourceCollections) const;
1131 TestInstance *createInstance(Context &context) const;
1132
1133 static BufferToBufferInvertTest *UBOToSSBOInvertCase(
1134 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1135 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType);
1136
1137 static BufferToBufferInvertTest *CopyInvertSSBOCase(
1138 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1139 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType);
1140
1141 private:
1142 BufferToBufferInvertTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
1143 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize, const BufferType bufferType,
1144 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1145
1146 const BufferType m_bufferType;
1147 const uint32_t m_numValues;
1148 const tcu::IVec3 m_localSize;
1149 const tcu::IVec3 m_workSize;
1150 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1151 };
1152
1153 class BufferToBufferInvertTestInstance : public vkt::TestInstance
1154 {
1155 public:
1156 BufferToBufferInvertTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localSize,
1157 const tcu::IVec3 &workSize, const BufferType bufferType,
1158 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1159
1160 tcu::TestStatus iterate(void);
1161
1162 private:
1163 const BufferType m_bufferType;
1164 const uint32_t m_numValues;
1165 const tcu::IVec3 m_localSize;
1166 const tcu::IVec3 m_workSize;
1167 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1168 };
1169
BufferToBufferInvertTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType,const vk::ComputePipelineConstructionType computePipelineConstructionType)1170 BufferToBufferInvertTest::BufferToBufferInvertTest(
1171 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1172 const tcu::IVec3 &workSize, const BufferType bufferType,
1173 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1174 : TestCase(testCtx, name)
1175 , m_bufferType(bufferType)
1176 , m_numValues(numValues)
1177 , m_localSize(localSize)
1178 , m_workSize(workSize)
1179 , m_computePipelineConstructionType(computePipelineConstructionType)
1180 {
1181 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1182 DE_ASSERT(m_bufferType == BUFFER_TYPE_UNIFORM || m_bufferType == BUFFER_TYPE_SSBO);
1183 }
1184
UBOToSSBOInvertCase(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1185 BufferToBufferInvertTest *BufferToBufferInvertTest::UBOToSSBOInvertCase(
1186 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1187 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1188 {
1189 return new BufferToBufferInvertTest(testCtx, name, numValues, localSize, workSize, BUFFER_TYPE_UNIFORM,
1190 computePipelineConstructionType);
1191 }
1192
CopyInvertSSBOCase(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1193 BufferToBufferInvertTest *BufferToBufferInvertTest::CopyInvertSSBOCase(
1194 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const tcu::IVec3 &localSize,
1195 const tcu::IVec3 &workSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1196 {
1197 return new BufferToBufferInvertTest(testCtx, name, numValues, localSize, workSize, BUFFER_TYPE_SSBO,
1198 computePipelineConstructionType);
1199 }
1200
checkSupport(Context & context) const1201 void BufferToBufferInvertTest::checkSupport(Context &context) const
1202 {
1203 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1204 m_computePipelineConstructionType);
1205 }
1206
initPrograms(SourceCollections & sourceCollections) const1207 void BufferToBufferInvertTest::initPrograms(SourceCollections &sourceCollections) const
1208 {
1209 std::ostringstream src;
1210 if (m_bufferType == BUFFER_TYPE_UNIFORM)
1211 {
1212 src << "#version 310 es\n"
1213 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1214 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1215 << "layout(binding = 0) readonly uniform Input {\n"
1216 << " uint values[" << m_numValues << "];\n"
1217 << "} ub_in;\n"
1218 << "layout(binding = 1, std140) writeonly buffer Output {\n"
1219 << " uint values[" << m_numValues << "];\n"
1220 << "} sb_out;\n"
1221 << "void main (void) {\n"
1222 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1223 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
1224 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1225 "gl_GlobalInvocationID.x;\n"
1226 << " uint offset = numValuesPerInv*groupNdx;\n"
1227 << "\n"
1228 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1229 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
1230 << "}\n";
1231 }
1232 else if (m_bufferType == BUFFER_TYPE_SSBO)
1233 {
1234 src << "#version 310 es\n"
1235 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1236 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1237 << "layout(binding = 0, std140) readonly buffer Input {\n"
1238 << " uint values[" << m_numValues << "];\n"
1239 << "} sb_in;\n"
1240 << "layout (binding = 1, std140) writeonly buffer Output {\n"
1241 << " uint values[" << m_numValues << "];\n"
1242 << "} sb_out;\n"
1243 << "void main (void) {\n"
1244 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1245 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
1246 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1247 "gl_GlobalInvocationID.x;\n"
1248 << " uint offset = numValuesPerInv*groupNdx;\n"
1249 << "\n"
1250 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1251 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
1252 << "}\n";
1253 }
1254
1255 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1256 }
1257
createInstance(Context & context) const1258 TestInstance *BufferToBufferInvertTest::createInstance(Context &context) const
1259 {
1260 return new BufferToBufferInvertTestInstance(context, m_numValues, m_localSize, m_workSize, m_bufferType,
1261 m_computePipelineConstructionType);
1262 }
1263
BufferToBufferInvertTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const BufferType bufferType,const vk::ComputePipelineConstructionType computePipelineConstructionType)1264 BufferToBufferInvertTestInstance::BufferToBufferInvertTestInstance(
1265 Context &context, const uint32_t numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1266 const BufferType bufferType, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1267 : TestInstance(context)
1268 , m_bufferType(bufferType)
1269 , m_numValues(numValues)
1270 , m_localSize(localSize)
1271 , m_workSize(workSize)
1272 , m_computePipelineConstructionType(computePipelineConstructionType)
1273 {
1274 }
1275
iterate(void)1276 tcu::TestStatus BufferToBufferInvertTestInstance::iterate(void)
1277 {
1278 const DeviceInterface &vk = m_context.getDeviceInterface();
1279 const VkDevice device = m_context.getDevice();
1280 const VkQueue queue = m_context.getUniversalQueue();
1281 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1282 Allocator &allocator = m_context.getDefaultAllocator();
1283
1284 // Customize the test based on buffer type
1285
1286 const VkBufferUsageFlags inputBufferUsageFlags =
1287 (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1288 const VkDescriptorType inputBufferDescriptorType =
1289 (m_bufferType == BUFFER_TYPE_UNIFORM ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1290 const uint32_t randomSeed = (m_bufferType == BUFFER_TYPE_UNIFORM ? 0x111223f : 0x124fef);
1291
1292 // Create an input buffer
1293
1294 const VkDeviceSize bufferSizeBytes = sizeof(tcu::UVec4) * m_numValues;
1295 const BufferWithMemory inputBuffer(vk, device, allocator,
1296 makeBufferCreateInfo(bufferSizeBytes, inputBufferUsageFlags),
1297 MemoryRequirement::HostVisible);
1298
1299 // Fill the input buffer with data
1300 {
1301 de::Random rnd(randomSeed);
1302 const Allocation &inputBufferAllocation = inputBuffer.getAllocation();
1303 tcu::UVec4 *bufferPtr = static_cast<tcu::UVec4 *>(inputBufferAllocation.getHostPtr());
1304 for (uint32_t i = 0; i < m_numValues; ++i)
1305 bufferPtr[i].x() = rnd.getUint32();
1306
1307 flushAlloc(vk, device, inputBufferAllocation);
1308 }
1309
1310 // Create an output buffer
1311
1312 const BufferWithMemory outputBuffer(vk, device, allocator,
1313 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1314 MemoryRequirement::HostVisible);
1315
1316 // Create descriptor set
1317
1318 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1319 DescriptorSetLayoutBuilder()
1320 .addSingleBinding(inputBufferDescriptorType, VK_SHADER_STAGE_COMPUTE_BIT)
1321 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1322 .build(vk, device));
1323
1324 const Unique<VkDescriptorPool> descriptorPool(
1325 DescriptorPoolBuilder()
1326 .addType(inputBufferDescriptorType)
1327 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1328 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1329
1330 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1331
1332 const VkDescriptorBufferInfo inputBufferDescriptorInfo =
1333 makeDescriptorBufferInfo(*inputBuffer, 0ull, bufferSizeBytes);
1334 const VkDescriptorBufferInfo outputBufferDescriptorInfo =
1335 makeDescriptorBufferInfo(*outputBuffer, 0ull, bufferSizeBytes);
1336 DescriptorSetUpdateBuilder()
1337 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), inputBufferDescriptorType,
1338 &inputBufferDescriptorInfo)
1339 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1340 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
1341 .update(vk, device);
1342
1343 // Perform the computation
1344
1345 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
1346 m_context.getBinaryCollection().get("comp"));
1347 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1348 pipeline.buildPipeline();
1349
1350 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(
1351 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, bufferSizeBytes);
1352
1353 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(
1354 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, bufferSizeBytes);
1355
1356 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1357 const Unique<VkCommandBuffer> cmdBuffer(
1358 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1359
1360 // Start recording commands
1361
1362 beginCommandBuffer(vk, *cmdBuffer);
1363
1364 pipeline.bind(*cmdBuffer);
1365 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1366 &descriptorSet.get(), 0u, DE_NULL);
1367
1368 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1369 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &hostWriteBarrier, 0,
1370 (const VkImageMemoryBarrier *)DE_NULL);
1371 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1372 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1373 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &shaderWriteBarrier, 0,
1374 (const VkImageMemoryBarrier *)DE_NULL);
1375
1376 endCommandBuffer(vk, *cmdBuffer);
1377
1378 // Wait for completion
1379
1380 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1381
1382 // Validate the results
1383
1384 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
1385 invalidateAlloc(vk, device, outputBufferAllocation);
1386
1387 const tcu::UVec4 *bufferPtr = static_cast<tcu::UVec4 *>(outputBufferAllocation.getHostPtr());
1388 const tcu::UVec4 *refBufferPtr = static_cast<tcu::UVec4 *>(inputBuffer.getAllocation().getHostPtr());
1389
1390 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1391 {
1392 const uint32_t res = bufferPtr[ndx].x();
1393 const uint32_t ref = ~refBufferPtr[ndx].x();
1394
1395 if (res != ref)
1396 {
1397 std::ostringstream msg;
1398 msg << "Comparison failed for Output.values[" << ndx << "]";
1399 return tcu::TestStatus::fail(msg.str());
1400 }
1401 }
1402 return tcu::TestStatus::pass("Compute succeeded");
1403 }
1404
1405 class InvertSSBOInPlaceTest : public vkt::TestCase
1406 {
1407 public:
1408 InvertSSBOInPlaceTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
1409 const bool sized, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1410 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1411
1412 virtual void checkSupport(Context &context) const;
1413 void initPrograms(SourceCollections &sourceCollections) const;
1414 TestInstance *createInstance(Context &context) const;
1415
1416 private:
1417 const uint32_t m_numValues;
1418 const bool m_sized;
1419 const tcu::IVec3 m_localSize;
1420 const tcu::IVec3 m_workSize;
1421 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1422 };
1423
1424 class InvertSSBOInPlaceTestInstance : public vkt::TestInstance
1425 {
1426 public:
1427 InvertSSBOInPlaceTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localSize,
1428 const tcu::IVec3 &workSize,
1429 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1430
1431 tcu::TestStatus iterate(void);
1432
1433 private:
1434 const uint32_t m_numValues;
1435 const tcu::IVec3 m_localSize;
1436 const tcu::IVec3 m_workSize;
1437 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1438 };
1439
InvertSSBOInPlaceTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1440 InvertSSBOInPlaceTest::InvertSSBOInPlaceTest(tcu::TestContext &testCtx, const std::string &name,
1441 const uint32_t numValues, const bool sized, const tcu::IVec3 &localSize,
1442 const tcu::IVec3 &workSize,
1443 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1444 : TestCase(testCtx, name)
1445 , m_numValues(numValues)
1446 , m_sized(sized)
1447 , m_localSize(localSize)
1448 , m_workSize(workSize)
1449 , m_computePipelineConstructionType(computePipelineConstructionType)
1450 {
1451 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1452 }
1453
checkSupport(Context & context) const1454 void InvertSSBOInPlaceTest::checkSupport(Context &context) const
1455 {
1456 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1457 m_computePipelineConstructionType);
1458 }
1459
initPrograms(SourceCollections & sourceCollections) const1460 void InvertSSBOInPlaceTest::initPrograms(SourceCollections &sourceCollections) const
1461 {
1462 std::ostringstream src;
1463 src << "#version 310 es\n"
1464 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1465 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1466 << "layout(binding = 0) buffer InOut {\n"
1467 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1468 << "} sb_inout;\n"
1469 << "void main (void) {\n"
1470 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1471 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
1472 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1473 "gl_GlobalInvocationID.x;\n"
1474 << " uint offset = numValuesPerInv*groupNdx;\n"
1475 << "\n"
1476 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1477 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
1478 << "}\n";
1479
1480 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1481 }
1482
createInstance(Context & context) const1483 TestInstance *InvertSSBOInPlaceTest::createInstance(Context &context) const
1484 {
1485 return new InvertSSBOInPlaceTestInstance(context, m_numValues, m_localSize, m_workSize,
1486 m_computePipelineConstructionType);
1487 }
1488
InvertSSBOInPlaceTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1489 InvertSSBOInPlaceTestInstance::InvertSSBOInPlaceTestInstance(
1490 Context &context, const uint32_t numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1491 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1492 : TestInstance(context)
1493 , m_numValues(numValues)
1494 , m_localSize(localSize)
1495 , m_workSize(workSize)
1496 , m_computePipelineConstructionType(computePipelineConstructionType)
1497 {
1498 }
1499
iterate(void)1500 tcu::TestStatus InvertSSBOInPlaceTestInstance::iterate(void)
1501 {
1502 const DeviceInterface &vk = m_context.getDeviceInterface();
1503 const VkDevice device = m_context.getDevice();
1504 const VkQueue queue = m_context.getUniversalQueue();
1505 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1506 Allocator &allocator = m_context.getDefaultAllocator();
1507
1508 // Create an input/output buffer
1509
1510 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
1511 const BufferWithMemory buffer(vk, device, allocator,
1512 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1513 MemoryRequirement::HostVisible);
1514
1515 // Fill the buffer with data
1516
1517 typedef std::vector<uint32_t> data_vector_t;
1518 data_vector_t inputData(m_numValues);
1519
1520 {
1521 de::Random rnd(0x82ce7f);
1522 const Allocation &bufferAllocation = buffer.getAllocation();
1523 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
1524 for (uint32_t i = 0; i < m_numValues; ++i)
1525 inputData[i] = *bufferPtr++ = rnd.getUint32();
1526
1527 flushAlloc(vk, device, bufferAllocation);
1528 }
1529
1530 // Create descriptor set
1531
1532 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1533 DescriptorSetLayoutBuilder()
1534 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1535 .build(vk, device));
1536
1537 const Unique<VkDescriptorPool> descriptorPool(
1538 DescriptorPoolBuilder()
1539 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1540 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1541
1542 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1543
1544 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
1545 DescriptorSetUpdateBuilder()
1546 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1547 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
1548 .update(vk, device);
1549
1550 // Perform the computation
1551
1552 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
1553 m_context.getBinaryCollection().get("comp"));
1554 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1555 pipeline.buildPipeline();
1556
1557 const VkBufferMemoryBarrier hostWriteBarrier =
1558 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1559
1560 const VkBufferMemoryBarrier shaderWriteBarrier =
1561 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
1562
1563 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1564 const Unique<VkCommandBuffer> cmdBuffer(
1565 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1566
1567 // Start recording commands
1568
1569 beginCommandBuffer(vk, *cmdBuffer);
1570
1571 pipeline.bind(*cmdBuffer);
1572 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1573 &descriptorSet.get(), 0u, DE_NULL);
1574
1575 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1576 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &hostWriteBarrier, 0,
1577 (const VkImageMemoryBarrier *)DE_NULL);
1578 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1579 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1580 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &shaderWriteBarrier, 0,
1581 (const VkImageMemoryBarrier *)DE_NULL);
1582
1583 endCommandBuffer(vk, *cmdBuffer);
1584
1585 // Wait for completion
1586
1587 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1588
1589 // Validate the results
1590
1591 const Allocation &bufferAllocation = buffer.getAllocation();
1592 invalidateAlloc(vk, device, bufferAllocation);
1593
1594 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
1595
1596 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1597 {
1598 const uint32_t res = bufferPtr[ndx];
1599 const uint32_t ref = ~inputData[ndx];
1600
1601 if (res != ref)
1602 {
1603 std::ostringstream msg;
1604 msg << "Comparison failed for InOut.values[" << ndx << "]";
1605 return tcu::TestStatus::fail(msg.str());
1606 }
1607 }
1608 return tcu::TestStatus::pass("Compute succeeded");
1609 }
1610
1611 class WriteToMultipleSSBOTest : public vkt::TestCase
1612 {
1613 public:
1614 WriteToMultipleSSBOTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
1615 const bool sized, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1616 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1617
1618 virtual void checkSupport(Context &context) const;
1619 void initPrograms(SourceCollections &sourceCollections) const;
1620 TestInstance *createInstance(Context &context) const;
1621
1622 private:
1623 const uint32_t m_numValues;
1624 const bool m_sized;
1625 const tcu::IVec3 m_localSize;
1626 const tcu::IVec3 m_workSize;
1627 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1628 };
1629
1630 class WriteToMultipleSSBOTestInstance : public vkt::TestInstance
1631 {
1632 public:
1633 WriteToMultipleSSBOTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localSize,
1634 const tcu::IVec3 &workSize,
1635 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1636
1637 tcu::TestStatus iterate(void);
1638
1639 private:
1640 const uint32_t m_numValues;
1641 const tcu::IVec3 m_localSize;
1642 const tcu::IVec3 m_workSize;
1643 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1644 };
1645
WriteToMultipleSSBOTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const bool sized,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1646 WriteToMultipleSSBOTest::WriteToMultipleSSBOTest(
1647 tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues, const bool sized,
1648 const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1649 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1650 : TestCase(testCtx, name)
1651 , m_numValues(numValues)
1652 , m_sized(sized)
1653 , m_localSize(localSize)
1654 , m_workSize(workSize)
1655 , m_computePipelineConstructionType(computePipelineConstructionType)
1656 {
1657 DE_ASSERT(m_numValues % (multiplyComponents(m_workSize) * multiplyComponents(m_localSize)) == 0);
1658 }
1659
checkSupport(Context & context) const1660 void WriteToMultipleSSBOTest::checkSupport(Context &context) const
1661 {
1662 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1663 m_computePipelineConstructionType);
1664 }
1665
initPrograms(SourceCollections & sourceCollections) const1666 void WriteToMultipleSSBOTest::initPrograms(SourceCollections &sourceCollections) const
1667 {
1668 std::ostringstream src;
1669 src << "#version 310 es\n"
1670 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
1671 << ", local_size_z = " << m_localSize.z() << ") in;\n"
1672 << "layout(binding = 0) writeonly buffer Out0 {\n"
1673 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1674 << "} sb_out0;\n"
1675 << "layout(binding = 1) writeonly buffer Out1 {\n"
1676 << " uint values[" << (m_sized ? de::toString(m_numValues) : "") << "];\n"
1677 << "} sb_out1;\n"
1678 << "void main (void) {\n"
1679 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1680 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
1681 "gl_GlobalInvocationID.x;\n"
1682 << "\n"
1683 << " {\n"
1684 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
1685 << " uint offset = numValuesPerInv*groupNdx;\n"
1686 << "\n"
1687 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1688 << " sb_out0.values[offset + ndx] = offset + ndx;\n"
1689 << " }\n"
1690 << " {\n"
1691 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
1692 << " uint offset = numValuesPerInv*groupNdx;\n"
1693 << "\n"
1694 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
1695 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
1696 << " }\n"
1697 << "}\n";
1698
1699 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1700 }
1701
createInstance(Context & context) const1702 TestInstance *WriteToMultipleSSBOTest::createInstance(Context &context) const
1703 {
1704 return new WriteToMultipleSSBOTestInstance(context, m_numValues, m_localSize, m_workSize,
1705 m_computePipelineConstructionType);
1706 }
1707
WriteToMultipleSSBOTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localSize,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1708 WriteToMultipleSSBOTestInstance::WriteToMultipleSSBOTestInstance(
1709 Context &context, const uint32_t numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize,
1710 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1711 : TestInstance(context)
1712 , m_numValues(numValues)
1713 , m_localSize(localSize)
1714 , m_workSize(workSize)
1715 , m_computePipelineConstructionType(computePipelineConstructionType)
1716 {
1717 }
1718
iterate(void)1719 tcu::TestStatus WriteToMultipleSSBOTestInstance::iterate(void)
1720 {
1721 const DeviceInterface &vk = m_context.getDeviceInterface();
1722 const VkDevice device = m_context.getDevice();
1723 const VkQueue queue = m_context.getUniversalQueue();
1724 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1725 Allocator &allocator = m_context.getDefaultAllocator();
1726
1727 // Create two output buffers
1728
1729 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
1730 const BufferWithMemory buffer0(vk, device, allocator,
1731 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1732 MemoryRequirement::HostVisible);
1733 const BufferWithMemory buffer1(vk, device, allocator,
1734 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1735 MemoryRequirement::HostVisible);
1736
1737 // Create descriptor set
1738
1739 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1740 DescriptorSetLayoutBuilder()
1741 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1742 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1743 .build(vk, device));
1744
1745 const Unique<VkDescriptorPool> descriptorPool(
1746 DescriptorPoolBuilder()
1747 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1748 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1749
1750 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1751
1752 const VkDescriptorBufferInfo buffer0DescriptorInfo = makeDescriptorBufferInfo(*buffer0, 0ull, bufferSizeBytes);
1753 const VkDescriptorBufferInfo buffer1DescriptorInfo = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
1754 DescriptorSetUpdateBuilder()
1755 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
1756 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer0DescriptorInfo)
1757 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
1758 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &buffer1DescriptorInfo)
1759 .update(vk, device);
1760
1761 // Perform the computation
1762
1763 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
1764 m_context.getBinaryCollection().get("comp"));
1765 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
1766 pipeline.buildPipeline();
1767
1768 const VkBufferMemoryBarrier shaderWriteBarriers[] = {
1769 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer0, 0ull, bufferSizeBytes),
1770 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes)};
1771
1772 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
1773 const Unique<VkCommandBuffer> cmdBuffer(
1774 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1775
1776 // Start recording commands
1777
1778 beginCommandBuffer(vk, *cmdBuffer);
1779
1780 pipeline.bind(*cmdBuffer);
1781 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
1782 &descriptorSet.get(), 0u, DE_NULL);
1783
1784 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
1785 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1786 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL,
1787 DE_LENGTH_OF_ARRAY(shaderWriteBarriers), shaderWriteBarriers, 0,
1788 (const VkImageMemoryBarrier *)DE_NULL);
1789
1790 endCommandBuffer(vk, *cmdBuffer);
1791
1792 // Wait for completion
1793
1794 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
1795
1796 // Validate the results
1797 {
1798 const Allocation &buffer0Allocation = buffer0.getAllocation();
1799 invalidateAlloc(vk, device, buffer0Allocation);
1800 const uint32_t *buffer0Ptr = static_cast<uint32_t *>(buffer0Allocation.getHostPtr());
1801
1802 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1803 {
1804 const uint32_t res = buffer0Ptr[ndx];
1805 const uint32_t ref = ndx;
1806
1807 if (res != ref)
1808 {
1809 std::ostringstream msg;
1810 msg << "Comparison failed for Out0.values[" << ndx << "] res=" << res << " ref=" << ref;
1811 return tcu::TestStatus::fail(msg.str());
1812 }
1813 }
1814 }
1815 {
1816 const Allocation &buffer1Allocation = buffer1.getAllocation();
1817 invalidateAlloc(vk, device, buffer1Allocation);
1818 const uint32_t *buffer1Ptr = static_cast<uint32_t *>(buffer1Allocation.getHostPtr());
1819
1820 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
1821 {
1822 const uint32_t res = buffer1Ptr[ndx];
1823 const uint32_t ref = m_numValues - ndx;
1824
1825 if (res != ref)
1826 {
1827 std::ostringstream msg;
1828 msg << "Comparison failed for Out1.values[" << ndx << "] res=" << res << " ref=" << ref;
1829 return tcu::TestStatus::fail(msg.str());
1830 }
1831 }
1832 }
1833 return tcu::TestStatus::pass("Compute succeeded");
1834 }
1835
1836 class SSBOBarrierTest : public vkt::TestCase
1837 {
1838 public:
1839 SSBOBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &workSize,
1840 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1841
1842 virtual void checkSupport(Context &context) const;
1843 void initPrograms(SourceCollections &sourceCollections) const;
1844 TestInstance *createInstance(Context &context) const;
1845
1846 private:
1847 const tcu::IVec3 m_workSize;
1848 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1849 };
1850
1851 class SSBOBarrierTestInstance : public vkt::TestInstance
1852 {
1853 public:
1854 SSBOBarrierTestInstance(Context &context, const tcu::IVec3 &workSize,
1855 const vk::ComputePipelineConstructionType computePipelineConstructionType);
1856
1857 tcu::TestStatus iterate(void);
1858
1859 private:
1860 const tcu::IVec3 m_workSize;
1861 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1862 };
1863
SSBOBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1864 SSBOBarrierTest::SSBOBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec3 &workSize,
1865 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1866 : TestCase(testCtx, name)
1867 , m_workSize(workSize)
1868 , m_computePipelineConstructionType(computePipelineConstructionType)
1869 {
1870 }
1871
checkSupport(Context & context) const1872 void SSBOBarrierTest::checkSupport(Context &context) const
1873 {
1874 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1875 m_computePipelineConstructionType);
1876 }
1877
initPrograms(SourceCollections & sourceCollections) const1878 void SSBOBarrierTest::initPrograms(SourceCollections &sourceCollections) const
1879 {
1880 sourceCollections.glslSources.add("comp0")
1881 << glu::ComputeSource("#version 310 es\n"
1882 "layout (local_size_x = 1) in;\n"
1883 "layout(binding = 2) readonly uniform Constants {\n"
1884 " uint u_baseVal;\n"
1885 "};\n"
1886 "layout(binding = 1) writeonly buffer Output {\n"
1887 " uint values[];\n"
1888 "};\n"
1889 "void main (void) {\n"
1890 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
1891 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1892 " values[offset] = u_baseVal + offset;\n"
1893 "}\n");
1894
1895 sourceCollections.glslSources.add("comp1")
1896 << glu::ComputeSource("#version 310 es\n"
1897 "layout (local_size_x = 1) in;\n"
1898 "layout(binding = 1) readonly buffer Input {\n"
1899 " uint values[];\n"
1900 "};\n"
1901 "layout(binding = 0) coherent buffer Output {\n"
1902 " uint sum;\n"
1903 "};\n"
1904 "void main (void) {\n"
1905 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
1906 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1907 " uint value = values[offset];\n"
1908 " atomicAdd(sum, value);\n"
1909 "}\n");
1910 }
1911
createInstance(Context & context) const1912 TestInstance *SSBOBarrierTest::createInstance(Context &context) const
1913 {
1914 return new SSBOBarrierTestInstance(context, m_workSize, m_computePipelineConstructionType);
1915 }
1916
SSBOBarrierTestInstance(Context & context,const tcu::IVec3 & workSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)1917 SSBOBarrierTestInstance::SSBOBarrierTestInstance(
1918 Context &context, const tcu::IVec3 &workSize,
1919 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1920 : TestInstance(context)
1921 , m_workSize(workSize)
1922 , m_computePipelineConstructionType(computePipelineConstructionType)
1923 {
1924 }
1925
iterate(void)1926 tcu::TestStatus SSBOBarrierTestInstance::iterate(void)
1927 {
1928 const DeviceInterface &vk = m_context.getDeviceInterface();
1929 const VkDevice device = m_context.getDevice();
1930 const VkQueue queue = m_context.getUniversalQueue();
1931 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1932 Allocator &allocator = m_context.getDefaultAllocator();
1933
1934 // Create a work buffer used by both shaders
1935
1936 const int workGroupCount = multiplyComponents(m_workSize);
1937 const VkDeviceSize workBufferSizeBytes = sizeof(uint32_t) * workGroupCount;
1938 const BufferWithMemory workBuffer(vk, device, allocator,
1939 makeBufferCreateInfo(workBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1940 MemoryRequirement::Any);
1941
1942 // Create an output buffer
1943
1944 const VkDeviceSize outputBufferSizeBytes = sizeof(uint32_t);
1945 const BufferWithMemory outputBuffer(vk, device, allocator,
1946 makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
1947 MemoryRequirement::HostVisible);
1948
1949 // Initialize atomic counter value to zero
1950 {
1951 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
1952 uint32_t *outputBufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
1953 *outputBufferPtr = 0;
1954 flushAlloc(vk, device, outputBufferAllocation);
1955 }
1956
1957 // Create a uniform buffer (to pass uniform constants)
1958
1959 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t);
1960 const BufferWithMemory uniformBuffer(
1961 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
1962 MemoryRequirement::HostVisible);
1963
1964 // Set the constants in the uniform buffer
1965
1966 const uint32_t baseValue = 127;
1967 {
1968 const Allocation &uniformBufferAllocation = uniformBuffer.getAllocation();
1969 uint32_t *uniformBufferPtr = static_cast<uint32_t *>(uniformBufferAllocation.getHostPtr());
1970 uniformBufferPtr[0] = baseValue;
1971
1972 flushAlloc(vk, device, uniformBufferAllocation);
1973 }
1974
1975 // Create descriptor set
1976
1977 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
1978 DescriptorSetLayoutBuilder()
1979 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1980 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1981 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1982 .build(vk, device));
1983
1984 const Unique<VkDescriptorPool> descriptorPool(
1985 DescriptorPoolBuilder()
1986 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
1987 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1988 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1989
1990 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1991
1992 const VkDescriptorBufferInfo workBufferDescriptorInfo =
1993 makeDescriptorBufferInfo(*workBuffer, 0ull, workBufferSizeBytes);
1994 const VkDescriptorBufferInfo outputBufferDescriptorInfo =
1995 makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
1996 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
1997 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
1998 DescriptorSetUpdateBuilder()
1999 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2000 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2001 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2002 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &workBufferDescriptorInfo)
2003 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u),
2004 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2005 .update(vk, device);
2006
2007 // Perform the computation
2008
2009 ComputePipelineWrapper pipeline0(vk, device, m_computePipelineConstructionType,
2010 m_context.getBinaryCollection().get("comp0"));
2011 pipeline0.setDescriptorSetLayout(descriptorSetLayout.get());
2012 pipeline0.buildPipeline();
2013
2014 ComputePipelineWrapper pipeline1(vk, device, m_computePipelineConstructionType,
2015 m_context.getBinaryCollection().get("comp1"));
2016 pipeline1.setDescriptorSetLayout(descriptorSetLayout.get());
2017 pipeline1.buildPipeline();
2018
2019 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(
2020 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2021
2022 const VkBufferMemoryBarrier betweenShadersBarrier = makeBufferMemoryBarrier(
2023 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *workBuffer, 0ull, workBufferSizeBytes);
2024
2025 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(
2026 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2027
2028 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2029 const Unique<VkCommandBuffer> cmdBuffer(
2030 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2031
2032 // Start recording commands
2033
2034 beginCommandBuffer(vk, *cmdBuffer);
2035
2036 pipeline0.bind(*cmdBuffer);
2037 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline0.getPipelineLayout(), 0u, 1u,
2038 &descriptorSet.get(), 0u, DE_NULL);
2039
2040 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2041 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &writeUniformConstantsBarrier,
2042 0, (const VkImageMemoryBarrier *)DE_NULL);
2043
2044 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
2045 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2046 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &betweenShadersBarrier, 0,
2047 (const VkImageMemoryBarrier *)DE_NULL);
2048
2049 // Switch to the second shader program
2050 pipeline1.bind(*cmdBuffer);
2051
2052 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
2053 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
2054 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &afterComputeBarrier, 0,
2055 (const VkImageMemoryBarrier *)DE_NULL);
2056
2057 endCommandBuffer(vk, *cmdBuffer);
2058
2059 // Wait for completion
2060
2061 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2062
2063 // Validate the results
2064
2065 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2066 invalidateAlloc(vk, device, outputBufferAllocation);
2067
2068 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2069 const uint32_t res = *bufferPtr;
2070 uint32_t ref = 0;
2071
2072 for (int ndx = 0; ndx < workGroupCount; ++ndx)
2073 ref += baseValue + ndx;
2074
2075 if (res != ref)
2076 {
2077 std::ostringstream msg;
2078 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2079 return tcu::TestStatus::fail(msg.str());
2080 }
2081 return tcu::TestStatus::pass("Compute succeeded");
2082 }
2083
2084 class ImageAtomicOpTest : public vkt::TestCase
2085 {
2086 public:
2087 ImageAtomicOpTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t localSize,
2088 const tcu::IVec2 &imageSize,
2089 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2090
2091 virtual void checkSupport(Context &context) const;
2092 void initPrograms(SourceCollections &sourceCollections) const;
2093 TestInstance *createInstance(Context &context) const;
2094
2095 private:
2096 const uint32_t m_localSize;
2097 const tcu::IVec2 m_imageSize;
2098 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2099 };
2100
2101 class ImageAtomicOpTestInstance : public vkt::TestInstance
2102 {
2103 public:
2104 ImageAtomicOpTestInstance(Context &context, const uint32_t localSize, const tcu::IVec2 &imageSize,
2105 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2106
2107 tcu::TestStatus iterate(void);
2108
2109 private:
2110 const uint32_t m_localSize;
2111 const tcu::IVec2 m_imageSize;
2112 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2113 };
2114
ImageAtomicOpTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2115 ImageAtomicOpTest::ImageAtomicOpTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t localSize,
2116 const tcu::IVec2 &imageSize,
2117 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2118 : TestCase(testCtx, name)
2119 , m_localSize(localSize)
2120 , m_imageSize(imageSize)
2121 , m_computePipelineConstructionType(computePipelineConstructionType)
2122 {
2123 }
2124
checkSupport(Context & context) const2125 void ImageAtomicOpTest::checkSupport(Context &context) const
2126 {
2127 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
2128 m_computePipelineConstructionType);
2129 }
2130
initPrograms(SourceCollections & sourceCollections) const2131 void ImageAtomicOpTest::initPrograms(SourceCollections &sourceCollections) const
2132 {
2133 std::ostringstream src;
2134 src << "#version 310 es\n"
2135 << "#extension GL_OES_shader_image_atomic : require\n"
2136 << "layout (local_size_x = " << m_localSize << ") in;\n"
2137 << "layout(binding = 1, r32ui) coherent uniform highp uimage2D u_dstImg;\n"
2138 << "layout(binding = 0) readonly buffer Input {\n"
2139 << " uint values[" << (multiplyComponents(m_imageSize) * m_localSize) << "];\n"
2140 << "} sb_in;\n\n"
2141 << "void main (void) {\n"
2142 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
2143 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
2144 << "\n"
2145 << " if (gl_LocalInvocationIndex == 0u)\n"
2146 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
2147 << " memoryBarrierImage();\n"
2148 << " barrier();\n"
2149 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
2150 << "}\n";
2151
2152 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2153 }
2154
createInstance(Context & context) const2155 TestInstance *ImageAtomicOpTest::createInstance(Context &context) const
2156 {
2157 return new ImageAtomicOpTestInstance(context, m_localSize, m_imageSize, m_computePipelineConstructionType);
2158 }
2159
ImageAtomicOpTestInstance(Context & context,const uint32_t localSize,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2160 ImageAtomicOpTestInstance::ImageAtomicOpTestInstance(
2161 Context &context, const uint32_t localSize, const tcu::IVec2 &imageSize,
2162 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2163 : TestInstance(context)
2164 , m_localSize(localSize)
2165 , m_imageSize(imageSize)
2166 , m_computePipelineConstructionType(computePipelineConstructionType)
2167 {
2168 }
2169
iterate(void)2170 tcu::TestStatus ImageAtomicOpTestInstance::iterate(void)
2171 {
2172 const DeviceInterface &vk = m_context.getDeviceInterface();
2173 const VkDevice device = m_context.getDevice();
2174 const VkQueue queue = m_context.getUniversalQueue();
2175 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2176 Allocator &allocator = m_context.getDefaultAllocator();
2177
2178 // Create an image
2179
2180 const VkImageCreateInfo imageParams =
2181 make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT);
2182 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2183
2184 const VkImageSubresourceRange subresourceRange =
2185 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2186 const Unique<VkImageView> imageView(
2187 makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2188
2189 // Input buffer
2190
2191 const uint32_t numInputValues = multiplyComponents(m_imageSize) * m_localSize;
2192 const VkDeviceSize inputBufferSizeBytes = sizeof(uint32_t) * numInputValues;
2193
2194 const BufferWithMemory inputBuffer(vk, device, allocator,
2195 makeBufferCreateInfo(inputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
2196 MemoryRequirement::HostVisible);
2197
2198 // Populate the input buffer with test data
2199 {
2200 de::Random rnd(0x77238ac2);
2201 const Allocation &inputBufferAllocation = inputBuffer.getAllocation();
2202 uint32_t *bufferPtr = static_cast<uint32_t *>(inputBufferAllocation.getHostPtr());
2203 for (uint32_t i = 0; i < numInputValues; ++i)
2204 *bufferPtr++ = rnd.getUint32();
2205
2206 flushAlloc(vk, device, inputBufferAllocation);
2207 }
2208
2209 // Create a buffer to store shader output (copied from image data)
2210
2211 const uint32_t imageArea = multiplyComponents(m_imageSize);
2212 const VkDeviceSize outputBufferSizeBytes = sizeof(uint32_t) * imageArea;
2213 const BufferWithMemory outputBuffer(vk, device, allocator,
2214 makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT),
2215 MemoryRequirement::HostVisible);
2216
2217 // Create descriptor set
2218
2219 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2220 DescriptorSetLayoutBuilder()
2221 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2222 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2223 .build(vk, device));
2224
2225 const Unique<VkDescriptorPool> descriptorPool(
2226 DescriptorPoolBuilder()
2227 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2228 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2229 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2230
2231 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2232
2233 // Set the bindings
2234
2235 const VkDescriptorImageInfo imageDescriptorInfo =
2236 makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2237 const VkDescriptorBufferInfo bufferDescriptorInfo =
2238 makeDescriptorBufferInfo(*inputBuffer, 0ull, inputBufferSizeBytes);
2239
2240 DescriptorSetUpdateBuilder()
2241 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2242 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2243 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2244 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2245 .update(vk, device);
2246
2247 // Perform the computation
2248 {
2249 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
2250 m_context.getBinaryCollection().get("comp"));
2251 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
2252 pipeline.buildPipeline();
2253
2254 const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier = makeBufferMemoryBarrier(
2255 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *inputBuffer, 0ull, inputBufferSizeBytes);
2256
2257 const VkImageMemoryBarrier imageLayoutBarrier =
2258 makeImageMemoryBarrier((VkAccessFlags)0, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
2259 VK_IMAGE_LAYOUT_GENERAL, *image, subresourceRange);
2260
2261 // Prepare the command buffer
2262
2263 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2264 const Unique<VkCommandBuffer> cmdBuffer(
2265 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2266
2267 // Start recording commands
2268
2269 beginCommandBuffer(vk, *cmdBuffer);
2270
2271 pipeline.bind(*cmdBuffer);
2272 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
2273 &descriptorSet.get(), 0u, DE_NULL);
2274
2275 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2276 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1,
2277 &inputBufferPostHostWriteBarrier, 1, &imageLayoutBarrier);
2278 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2279
2280 copyImageToBuffer(vk, *cmdBuffer, *image, *outputBuffer, m_imageSize, VK_ACCESS_SHADER_WRITE_BIT,
2281 VK_IMAGE_LAYOUT_GENERAL);
2282
2283 endCommandBuffer(vk, *cmdBuffer);
2284
2285 // Wait for completion
2286
2287 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2288 }
2289
2290 // Validate the results
2291
2292 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2293 invalidateAlloc(vk, device, outputBufferAllocation);
2294
2295 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2296 const uint32_t *refBufferPtr = static_cast<uint32_t *>(inputBuffer.getAllocation().getHostPtr());
2297
2298 for (uint32_t pixelNdx = 0; pixelNdx < imageArea; ++pixelNdx)
2299 {
2300 const uint32_t res = bufferPtr[pixelNdx];
2301 uint32_t ref = 0;
2302
2303 for (uint32_t offs = 0; offs < m_localSize; ++offs)
2304 ref += refBufferPtr[pixelNdx * m_localSize + offs];
2305
2306 if (res != ref)
2307 {
2308 std::ostringstream msg;
2309 msg << "Comparison failed for pixel " << pixelNdx;
2310 return tcu::TestStatus::fail(msg.str());
2311 }
2312 }
2313 return tcu::TestStatus::pass("Compute succeeded");
2314 }
2315
2316 class ImageBarrierTest : public vkt::TestCase
2317 {
2318 public:
2319 ImageBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &imageSize,
2320 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2321
2322 virtual void checkSupport(Context &context) const;
2323 void initPrograms(SourceCollections &sourceCollections) const;
2324 TestInstance *createInstance(Context &context) const;
2325
2326 private:
2327 const tcu::IVec2 m_imageSize;
2328 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2329 };
2330
2331 class ImageBarrierTestInstance : public vkt::TestInstance
2332 {
2333 public:
2334 ImageBarrierTestInstance(Context &context, const tcu::IVec2 &imageSize,
2335 const vk::ComputePipelineConstructionType computePipelineConstructionType);
2336
2337 tcu::TestStatus iterate(void);
2338
2339 private:
2340 const tcu::IVec2 m_imageSize;
2341 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2342 };
2343
ImageBarrierTest(tcu::TestContext & testCtx,const std::string & name,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2344 ImageBarrierTest::ImageBarrierTest(tcu::TestContext &testCtx, const std::string &name, const tcu::IVec2 &imageSize,
2345 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2346 : TestCase(testCtx, name)
2347 , m_imageSize(imageSize)
2348 , m_computePipelineConstructionType(computePipelineConstructionType)
2349 {
2350 }
2351
checkSupport(Context & context) const2352 void ImageBarrierTest::checkSupport(Context &context) const
2353 {
2354 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
2355 m_computePipelineConstructionType);
2356 }
2357
initPrograms(SourceCollections & sourceCollections) const2358 void ImageBarrierTest::initPrograms(SourceCollections &sourceCollections) const
2359 {
2360 sourceCollections.glslSources.add("comp0")
2361 << glu::ComputeSource("#version 310 es\n"
2362 "layout (local_size_x = 1) in;\n"
2363 "layout(binding = 2) readonly uniform Constants {\n"
2364 " uint u_baseVal;\n"
2365 "};\n"
2366 "layout(binding = 1, r32ui) writeonly uniform highp uimage2D u_img;\n"
2367 "void main (void) {\n"
2368 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + "
2369 "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
2370 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset + u_baseVal, 0, 0, 0));\n"
2371 "}\n");
2372
2373 sourceCollections.glslSources.add("comp1")
2374 << glu::ComputeSource("#version 310 es\n"
2375 "layout (local_size_x = 1) in;\n"
2376 "layout(binding = 1, r32ui) readonly uniform highp uimage2D u_img;\n"
2377 "layout(binding = 0) coherent buffer Output {\n"
2378 " uint sum;\n"
2379 "};\n"
2380 "void main (void) {\n"
2381 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
2382 " atomicAdd(sum, value);\n"
2383 "}\n");
2384 }
2385
createInstance(Context & context) const2386 TestInstance *ImageBarrierTest::createInstance(Context &context) const
2387 {
2388 return new ImageBarrierTestInstance(context, m_imageSize, m_computePipelineConstructionType);
2389 }
2390
ImageBarrierTestInstance(Context & context,const tcu::IVec2 & imageSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)2391 ImageBarrierTestInstance::ImageBarrierTestInstance(
2392 Context &context, const tcu::IVec2 &imageSize,
2393 const vk::ComputePipelineConstructionType computePipelineConstructionType)
2394 : TestInstance(context)
2395 , m_imageSize(imageSize)
2396 , m_computePipelineConstructionType(computePipelineConstructionType)
2397 {
2398 }
2399
iterate(void)2400 tcu::TestStatus ImageBarrierTestInstance::iterate(void)
2401 {
2402 const DeviceInterface &vk = m_context.getDeviceInterface();
2403 const VkDevice device = m_context.getDevice();
2404 const VkQueue queue = m_context.getUniversalQueue();
2405 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2406 Allocator &allocator = m_context.getDefaultAllocator();
2407
2408 // Create an image used by both shaders
2409
2410 const VkImageCreateInfo imageParams = make2DImageCreateInfo(m_imageSize, VK_IMAGE_USAGE_STORAGE_BIT);
2411 const ImageWithMemory image(vk, device, allocator, imageParams, MemoryRequirement::Any);
2412
2413 const VkImageSubresourceRange subresourceRange =
2414 makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2415 const Unique<VkImageView> imageView(
2416 makeImageView(vk, device, *image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32_UINT, subresourceRange));
2417
2418 // Create an output buffer
2419
2420 const VkDeviceSize outputBufferSizeBytes = sizeof(uint32_t);
2421 const BufferWithMemory outputBuffer(vk, device, allocator,
2422 makeBufferCreateInfo(outputBufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
2423 MemoryRequirement::HostVisible);
2424
2425 // Initialize atomic counter value to zero
2426 {
2427 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2428 uint32_t *outputBufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2429 *outputBufferPtr = 0;
2430 flushAlloc(vk, device, outputBufferAllocation);
2431 }
2432
2433 // Create a uniform buffer (to pass uniform constants)
2434
2435 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t);
2436 const BufferWithMemory uniformBuffer(
2437 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
2438 MemoryRequirement::HostVisible);
2439
2440 // Set the constants in the uniform buffer
2441
2442 const uint32_t baseValue = 127;
2443 {
2444 const Allocation &uniformBufferAllocation = uniformBuffer.getAllocation();
2445 uint32_t *uniformBufferPtr = static_cast<uint32_t *>(uniformBufferAllocation.getHostPtr());
2446 uniformBufferPtr[0] = baseValue;
2447
2448 flushAlloc(vk, device, uniformBufferAllocation);
2449 }
2450
2451 // Create descriptor set
2452
2453 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2454 DescriptorSetLayoutBuilder()
2455 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2456 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
2457 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2458 .build(vk, device));
2459
2460 const Unique<VkDescriptorPool> descriptorPool(
2461 DescriptorPoolBuilder()
2462 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2463 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
2464 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2465 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2466
2467 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2468
2469 const VkDescriptorImageInfo imageDescriptorInfo =
2470 makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
2471 const VkDescriptorBufferInfo outputBufferDescriptorInfo =
2472 makeDescriptorBufferInfo(*outputBuffer, 0ull, outputBufferSizeBytes);
2473 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
2474 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2475 DescriptorSetUpdateBuilder()
2476 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2477 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescriptorInfo)
2478 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2479 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptorInfo)
2480 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(2u),
2481 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2482 .update(vk, device);
2483
2484 // Perform the computation
2485
2486 ComputePipelineWrapper pipeline0(vk, device, m_computePipelineConstructionType,
2487 m_context.getBinaryCollection().get("comp0"));
2488 pipeline0.setDescriptorSetLayout(descriptorSetLayout.get());
2489 pipeline0.buildPipeline();
2490 ComputePipelineWrapper pipeline1(vk, device, m_computePipelineConstructionType,
2491 m_context.getBinaryCollection().get("comp1"));
2492 pipeline1.setDescriptorSetLayout(descriptorSetLayout.get());
2493 pipeline1.buildPipeline();
2494
2495 const VkBufferMemoryBarrier writeUniformConstantsBarrier = makeBufferMemoryBarrier(
2496 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2497
2498 const VkImageMemoryBarrier imageLayoutBarrier =
2499 makeImageMemoryBarrier(0u, 0u, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *image, subresourceRange);
2500
2501 const VkImageMemoryBarrier imageBarrierBetweenShaders =
2502 makeImageMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL,
2503 VK_IMAGE_LAYOUT_GENERAL, *image, subresourceRange);
2504
2505 const VkBufferMemoryBarrier afterComputeBarrier = makeBufferMemoryBarrier(
2506 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, outputBufferSizeBytes);
2507
2508 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2509 const Unique<VkCommandBuffer> cmdBuffer(
2510 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2511
2512 // Start recording commands
2513
2514 beginCommandBuffer(vk, *cmdBuffer);
2515
2516 pipeline0.bind(*cmdBuffer);
2517 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline0.getPipelineLayout(), 0u, 1u,
2518 &descriptorSet.get(), 0u, DE_NULL);
2519
2520 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2521 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &writeUniformConstantsBarrier,
2522 1, &imageLayoutBarrier);
2523
2524 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2525 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
2526 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 0,
2527 (const VkBufferMemoryBarrier *)DE_NULL, 1, &imageBarrierBetweenShaders);
2528
2529 // Switch to the second shader program
2530 pipeline1.bind(*cmdBuffer);
2531
2532 vk.cmdDispatch(*cmdBuffer, m_imageSize.x(), m_imageSize.y(), 1u);
2533 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
2534 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &afterComputeBarrier, 0,
2535 (const VkImageMemoryBarrier *)DE_NULL);
2536
2537 endCommandBuffer(vk, *cmdBuffer);
2538
2539 // Wait for completion
2540
2541 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2542
2543 // Validate the results
2544
2545 const Allocation &outputBufferAllocation = outputBuffer.getAllocation();
2546 invalidateAlloc(vk, device, outputBufferAllocation);
2547
2548 const int numValues = multiplyComponents(m_imageSize);
2549 const uint32_t *bufferPtr = static_cast<uint32_t *>(outputBufferAllocation.getHostPtr());
2550 const uint32_t res = *bufferPtr;
2551 uint32_t ref = 0;
2552
2553 for (int ndx = 0; ndx < numValues; ++ndx)
2554 ref += baseValue + ndx;
2555
2556 if (res != ref)
2557 {
2558 std::ostringstream msg;
2559 msg << "ERROR: comparison failed, expected " << ref << ", got " << res;
2560 return tcu::TestStatus::fail(msg.str());
2561 }
2562 return tcu::TestStatus::pass("Compute succeeded");
2563 }
2564
2565 class ComputeTestInstance : public vkt::TestInstance
2566 {
2567 public:
ComputeTestInstance(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType,bool useMaintenance5)2568 ComputeTestInstance(Context &context, vk::ComputePipelineConstructionType computePipelineConstructionType,
2569 bool useMaintenance5)
2570 : TestInstance(context)
2571 , m_numPhysDevices(1)
2572 , m_queueFamilyIndex(0)
2573 , m_computePipelineConstructionType(computePipelineConstructionType)
2574 , m_maintenance5(useMaintenance5)
2575 {
2576 createDeviceGroup();
2577 }
2578
~ComputeTestInstance()2579 ~ComputeTestInstance()
2580 {
2581 }
2582
2583 void createDeviceGroup(void);
getDeviceInterface(void)2584 const vk::DeviceInterface &getDeviceInterface(void)
2585 {
2586 return *m_deviceDriver;
2587 }
getInstance(void)2588 vk::VkInstance getInstance(void)
2589 {
2590 return m_deviceGroupInstance;
2591 }
getDevice(void)2592 vk::VkDevice getDevice(void)
2593 {
2594 return *m_logicalDevice;
2595 }
getPhysicalDevice(uint32_t i=0)2596 vk::VkPhysicalDevice getPhysicalDevice(uint32_t i = 0)
2597 {
2598 return m_physicalDevices[i];
2599 }
2600
2601 protected:
2602 uint32_t m_numPhysDevices;
2603 uint32_t m_queueFamilyIndex;
2604 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2605 bool m_maintenance5;
2606
2607 private:
2608 CustomInstance m_deviceGroupInstance;
2609 vk::Move<vk::VkDevice> m_logicalDevice;
2610 std::vector<vk::VkPhysicalDevice> m_physicalDevices;
2611 #ifndef CTS_USES_VULKANSC
2612 de::MovePtr<vk::DeviceDriver> m_deviceDriver;
2613 #else
2614 de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> m_deviceDriver;
2615 #endif // CTS_USES_VULKANSC
2616 };
2617
createDeviceGroup(void)2618 void ComputeTestInstance::createDeviceGroup(void)
2619 {
2620 const tcu::CommandLine &cmdLine = m_context.getTestContext().getCommandLine();
2621 const uint32_t devGroupIdx = cmdLine.getVKDeviceGroupId() - 1;
2622 const uint32_t physDeviceIdx = cmdLine.getVKDeviceId() - 1;
2623 const float queuePriority = 1.0f;
2624 const std::vector<std::string> requiredExtensions(1, "VK_KHR_device_group_creation");
2625 m_deviceGroupInstance = createCustomInstanceWithExtensions(m_context, requiredExtensions);
2626 std::vector<VkPhysicalDeviceGroupProperties> devGroupProperties =
2627 enumeratePhysicalDeviceGroups(m_context.getInstanceInterface(), m_deviceGroupInstance);
2628 m_numPhysDevices = devGroupProperties[devGroupIdx].physicalDeviceCount;
2629 std::vector<const char *> deviceExtensions;
2630
2631 if (!isCoreDeviceExtension(m_context.getUsedApiVersion(), "VK_KHR_device_group"))
2632 deviceExtensions.push_back("VK_KHR_device_group");
2633
2634 if (m_maintenance5)
2635 deviceExtensions.push_back("VK_KHR_maintenance5");
2636
2637 //m_ma
2638
2639 VkDeviceGroupDeviceCreateInfo deviceGroupInfo = {
2640 VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO, //stype
2641 DE_NULL, //pNext
2642 devGroupProperties[devGroupIdx].physicalDeviceCount, //physicalDeviceCount
2643 devGroupProperties[devGroupIdx].physicalDevices //physicalDevices
2644 };
2645 const InstanceDriver &instance(m_deviceGroupInstance.getDriver());
2646 VkPhysicalDeviceFeatures2 deviceFeatures2 = initVulkanStructure();
2647 const VkPhysicalDeviceFeatures deviceFeatures =
2648 getPhysicalDeviceFeatures(instance, deviceGroupInfo.pPhysicalDevices[physDeviceIdx]);
2649 const std::vector<VkQueueFamilyProperties> queueProps = getPhysicalDeviceQueueFamilyProperties(
2650 instance, devGroupProperties[devGroupIdx].physicalDevices[physDeviceIdx]);
2651
2652 deviceFeatures2.features = deviceFeatures;
2653
2654 #ifndef CTS_USES_VULKANSC
2655 VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamicRenderingFeatures = initVulkanStructure();
2656 dynamicRenderingFeatures.dynamicRendering = VK_TRUE;
2657 VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures = initVulkanStructure(&dynamicRenderingFeatures);
2658 shaderObjectFeatures.shaderObject = VK_TRUE;
2659 if (m_computePipelineConstructionType)
2660 {
2661 deviceExtensions.push_back("VK_EXT_shader_object");
2662 deviceFeatures2.pNext = &shaderObjectFeatures;
2663 }
2664 #endif
2665
2666 m_physicalDevices.resize(m_numPhysDevices);
2667 for (uint32_t physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
2668 m_physicalDevices[physDevIdx] = devGroupProperties[devGroupIdx].physicalDevices[physDevIdx];
2669
2670 for (size_t queueNdx = 0; queueNdx < queueProps.size(); queueNdx++)
2671 {
2672 if (queueProps[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
2673 m_queueFamilyIndex = (uint32_t)queueNdx;
2674 }
2675
2676 VkDeviceQueueCreateInfo queueInfo = {
2677 VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
2678 DE_NULL, // const void* pNext;
2679 (VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
2680 m_queueFamilyIndex, // uint32_t queueFamilyIndex;
2681 1u, // uint32_t queueCount;
2682 &queuePriority // const float* pQueuePriorities;
2683 };
2684
2685 void *pNext = &deviceGroupInfo;
2686 if (deviceFeatures2.pNext != DE_NULL)
2687 deviceGroupInfo.pNext = &deviceFeatures2;
2688
2689 #ifdef CTS_USES_VULKANSC
2690 VkDeviceObjectReservationCreateInfo memReservationInfo = cmdLine.isSubProcess() ?
2691 m_context.getResourceInterface()->getStatMax() :
2692 resetDeviceObjectReservationCreateInfo();
2693 memReservationInfo.pNext = pNext;
2694 pNext = &memReservationInfo;
2695
2696 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
2697 sc10Features.pNext = pNext;
2698 pNext = &sc10Features;
2699 VkPipelineCacheCreateInfo pcCI;
2700 std::vector<VkPipelinePoolSize> poolSizes;
2701 if (cmdLine.isSubProcess())
2702 {
2703 if (m_context.getResourceInterface()->getCacheDataSize() > 0)
2704 {
2705 pcCI = {
2706 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
2707 DE_NULL, // const void* pNext;
2708 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
2709 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
2710 m_context.getResourceInterface()->getCacheDataSize(), // uintptr_t initialDataSize;
2711 m_context.getResourceInterface()->getCacheData() // const void* pInitialData;
2712 };
2713 memReservationInfo.pipelineCacheCreateInfoCount = 1;
2714 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
2715 }
2716
2717 poolSizes = m_context.getResourceInterface()->getPipelinePoolSizes();
2718 if (!poolSizes.empty())
2719 {
2720 memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
2721 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
2722 }
2723 }
2724
2725 #endif // CTS_USES_VULKANSC
2726
2727 const VkDeviceCreateInfo deviceInfo = {
2728 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
2729 pNext, // const void* pNext;
2730 (VkDeviceCreateFlags)0, // VkDeviceCreateFlags flags;
2731 1u, // uint32_t queueCreateInfoCount;
2732 &queueInfo, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
2733 0u, // uint32_t enabledLayerCount;
2734 DE_NULL, // const char* const* ppEnabledLayerNames;
2735 uint32_t(deviceExtensions.size()), // uint32_t enabledExtensionCount;
2736 (deviceExtensions.empty() ? DE_NULL : &deviceExtensions[0]), // const char* const* ppEnabledExtensionNames;
2737 deviceFeatures2.pNext == DE_NULL ? &deviceFeatures :
2738 DE_NULL, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
2739 };
2740
2741 m_logicalDevice = createCustomDevice(m_context.getTestContext().getCommandLine().isValidationEnabled(),
2742 m_context.getPlatformInterface(), m_deviceGroupInstance, instance,
2743 deviceGroupInfo.pPhysicalDevices[physDeviceIdx], &deviceInfo);
2744 #ifndef CTS_USES_VULKANSC
2745 m_deviceDriver = de::MovePtr<DeviceDriver>(new DeviceDriver(m_context.getPlatformInterface(), m_deviceGroupInstance,
2746 *m_logicalDevice, m_context.getUsedApiVersion(),
2747 m_context.getTestContext().getCommandLine()));
2748 #else
2749 m_deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
2750 new DeviceDriverSC(m_context.getPlatformInterface(), m_context.getInstance(), *m_logicalDevice,
2751 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
2752 m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
2753 m_context.getUsedApiVersion()),
2754 vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), *m_logicalDevice));
2755 #endif // CTS_USES_VULKANSC
2756 }
2757
2758 class DispatchBaseTest : public vkt::TestCase
2759 {
2760 public:
2761 DispatchBaseTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
2762 const tcu::IVec3 &localsize, const tcu::IVec3 &worksize, const tcu::IVec3 &splitsize,
2763 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2764 const bool useMaintenance5);
2765
2766 virtual void checkSupport(Context &context) const;
2767 void initPrograms(SourceCollections &sourceCollections) const;
2768 TestInstance *createInstance(Context &context) const;
2769
2770 private:
2771 const uint32_t m_numValues;
2772 const tcu::IVec3 m_localSize;
2773 const tcu::IVec3 m_workSize;
2774 const tcu::IVec3 m_splitSize;
2775 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
2776 const bool m_useMaintenance5;
2777 };
2778
2779 class DispatchBaseTestInstance : public ComputeTestInstance
2780 {
2781 public:
2782 DispatchBaseTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localsize,
2783 const tcu::IVec3 &worksize, const tcu::IVec3 &splitsize,
2784 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2785 const bool useMaintenance5);
2786
2787 bool isInputVectorValid(const tcu::IVec3 &small, const tcu::IVec3 &big);
2788 tcu::TestStatus iterate(void);
2789
2790 private:
2791 const uint32_t m_numValues;
2792 const tcu::IVec3 m_localSize;
2793 const tcu::IVec3 m_workSize;
2794 const tcu::IVec3 m_splitWorkSize;
2795 const bool m_useMaintenance5;
2796 };
2797
DispatchBaseTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const tcu::IVec3 & splitsize,const vk::ComputePipelineConstructionType computePipelineConstructionType,const bool useMaintenance5)2798 DispatchBaseTest::DispatchBaseTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
2799 const tcu::IVec3 &localsize, const tcu::IVec3 &worksize, const tcu::IVec3 &splitsize,
2800 const vk::ComputePipelineConstructionType computePipelineConstructionType,
2801 const bool useMaintenance5)
2802 : TestCase(testCtx, name)
2803 , m_numValues(numValues)
2804 , m_localSize(localsize)
2805 , m_workSize(worksize)
2806 , m_splitSize(splitsize)
2807 , m_computePipelineConstructionType(computePipelineConstructionType)
2808 , m_useMaintenance5(useMaintenance5)
2809 {
2810 }
2811
checkSupport(Context & context) const2812 void DispatchBaseTest::checkSupport(Context &context) const
2813 {
2814 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
2815 m_computePipelineConstructionType);
2816 if (m_useMaintenance5)
2817 context.requireDeviceFunctionality("VK_KHR_maintenance5");
2818 }
2819
initPrograms(SourceCollections & sourceCollections) const2820 void DispatchBaseTest::initPrograms(SourceCollections &sourceCollections) const
2821 {
2822 std::ostringstream src;
2823 src << "#version 310 es\n"
2824 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
2825 << ", local_size_z = " << m_localSize.z() << ") in;\n"
2826
2827 << "layout(binding = 0) buffer InOut {\n"
2828 << " uint values[" << de::toString(m_numValues) << "];\n"
2829 << "} sb_inout;\n"
2830
2831 << "layout(binding = 1) readonly uniform uniformInput {\n"
2832 << " uvec3 gridSize;\n"
2833 << "} ubo_in;\n"
2834
2835 << "void main (void) {\n"
2836 << " uvec3 size = ubo_in.gridSize * gl_WorkGroupSize;\n"
2837 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
2838 << " uint index = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
2839 "gl_GlobalInvocationID.x;\n"
2840 << " uint offset = numValuesPerInv*index;\n"
2841 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
2842 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
2843 << "}\n";
2844
2845 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
2846 }
2847
createInstance(Context & context) const2848 TestInstance *DispatchBaseTest::createInstance(Context &context) const
2849 {
2850 return new DispatchBaseTestInstance(context, m_numValues, m_localSize, m_workSize, m_splitSize,
2851 m_computePipelineConstructionType, m_useMaintenance5);
2852 }
2853
DispatchBaseTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const tcu::IVec3 & splitsize,const vk::ComputePipelineConstructionType computePipelineConstructionType,const bool useMaintenance5)2854 DispatchBaseTestInstance::DispatchBaseTestInstance(
2855 Context &context, const uint32_t numValues, const tcu::IVec3 &localsize, const tcu::IVec3 &worksize,
2856 const tcu::IVec3 &splitsize, const vk::ComputePipelineConstructionType computePipelineConstructionType,
2857 const bool useMaintenance5)
2858
2859 : ComputeTestInstance(context, computePipelineConstructionType, useMaintenance5)
2860 , m_numValues(numValues)
2861 , m_localSize(localsize)
2862 , m_workSize(worksize)
2863 , m_splitWorkSize(splitsize)
2864 , m_useMaintenance5(useMaintenance5)
2865 {
2866 // For easy work distribution across physical devices:
2867 // WorkSize should be a multiple of SplitWorkSize only in the X component
2868 if ((!isInputVectorValid(m_splitWorkSize, m_workSize)) || (m_workSize.x() <= m_splitWorkSize.x()) ||
2869 (m_workSize.y() != m_splitWorkSize.y()) || (m_workSize.z() != m_splitWorkSize.z()))
2870 TCU_THROW(TestError, "Invalid Input.");
2871
2872 // For easy work distribution within the same physical device:
2873 // SplitWorkSize should be a multiple of localSize in Y or Z component
2874 if ((!isInputVectorValid(m_localSize, m_splitWorkSize)) || (m_localSize.x() != m_splitWorkSize.x()) ||
2875 (m_localSize.y() >= m_splitWorkSize.y()) || (m_localSize.z() >= m_splitWorkSize.z()))
2876 TCU_THROW(TestError, "Invalid Input.");
2877
2878 if ((multiplyComponents(m_workSize) / multiplyComponents(m_splitWorkSize)) < (int32_t)m_numPhysDevices)
2879 TCU_THROW(TestError, "Not enough work to distribute across all physical devices.");
2880
2881 uint32_t totalWork = multiplyComponents(m_workSize) * multiplyComponents(m_localSize);
2882 if ((totalWork > numValues) || (numValues % totalWork != 0))
2883 TCU_THROW(TestError, "Buffer too small/not aligned to cover all values.");
2884 }
2885
isInputVectorValid(const tcu::IVec3 & small,const tcu::IVec3 & big)2886 bool DispatchBaseTestInstance::isInputVectorValid(const tcu::IVec3 &small, const tcu::IVec3 &big)
2887 {
2888 if (((big.x() < small.x()) || (big.y() < small.y()) || (big.z() < small.z())) ||
2889 ((big.x() % small.x() != 0) || (big.y() % small.y() != 0) || (big.z() % small.z() != 0)))
2890 return false;
2891 return true;
2892 }
2893
iterate(void)2894 tcu::TestStatus DispatchBaseTestInstance::iterate(void)
2895 {
2896 const DeviceInterface &vk = getDeviceInterface();
2897 const VkDevice device = getDevice();
2898 const VkQueue queue = getDeviceQueue(vk, device, m_queueFamilyIndex, 0);
2899 SimpleAllocator allocator(vk, device,
2900 getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice()));
2901 uint32_t totalWorkloadSize = 0;
2902
2903 // Create an uniform and input/output buffer
2904 const uint32_t uniformBufSize = 3; // Pass the compute grid size
2905 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t) * uniformBufSize;
2906 const BufferWithMemory uniformBuffer(
2907 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
2908 MemoryRequirement::HostVisible);
2909
2910 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
2911 const BufferWithMemory buffer(vk, device, allocator,
2912 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
2913 MemoryRequirement::HostVisible);
2914
2915 // Fill the buffers with data
2916 typedef std::vector<uint32_t> data_vector_t;
2917 data_vector_t uniformInputData(uniformBufSize);
2918 data_vector_t inputData(m_numValues);
2919
2920 {
2921 const Allocation &bufferAllocation = uniformBuffer.getAllocation();
2922 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
2923 uniformInputData[0] = *bufferPtr++ = m_workSize.x();
2924 uniformInputData[1] = *bufferPtr++ = m_workSize.y();
2925 uniformInputData[2] = *bufferPtr++ = m_workSize.z();
2926 flushAlloc(vk, device, bufferAllocation);
2927 }
2928
2929 {
2930 de::Random rnd(0x82ce7f);
2931 const Allocation &bufferAllocation = buffer.getAllocation();
2932 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
2933 for (uint32_t i = 0; i < m_numValues; ++i)
2934 inputData[i] = *bufferPtr++ = rnd.getUint32();
2935
2936 flushAlloc(vk, device, bufferAllocation);
2937 }
2938
2939 // Create descriptor set
2940 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2941 DescriptorSetLayoutBuilder()
2942 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2943 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
2944 .build(vk, device));
2945
2946 const Unique<VkDescriptorPool> descriptorPool(
2947 DescriptorPoolBuilder()
2948 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
2949 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
2950 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2951
2952 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
2953
2954 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
2955 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
2956 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
2957
2958 DescriptorSetUpdateBuilder()
2959 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
2960 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
2961 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
2962 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
2963 .update(vk, device);
2964
2965 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
2966 m_context.getBinaryCollection().get("comp"));
2967 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
2968 pipeline.setPipelineCreateFlags(VK_PIPELINE_CREATE_DISPATCH_BASE);
2969
2970 #ifndef CTS_USES_VULKANSC
2971 if (m_useMaintenance5)
2972 {
2973 VkPipelineCreateFlags2CreateInfoKHR pipelineFlags2CreateInfo = initVulkanStructure();
2974 pipelineFlags2CreateInfo.flags = VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR;
2975 pipeline.setPipelineCreatePNext(&pipelineFlags2CreateInfo);
2976 pipeline.setPipelineCreateFlags(0);
2977 }
2978 #else
2979 DE_UNREF(m_useMaintenance5);
2980 #endif // CTS_USES_VULKANSC
2981
2982 pipeline.buildPipeline();
2983
2984 const VkBufferMemoryBarrier hostWriteBarrier =
2985 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
2986 const VkBufferMemoryBarrier hostUniformWriteBarrier = makeBufferMemoryBarrier(
2987 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
2988
2989 const VkBufferMemoryBarrier shaderWriteBarrier =
2990 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
2991
2992 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, m_queueFamilyIndex));
2993 const Unique<VkCommandBuffer> cmdBuffer(
2994 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2995
2996 // Start recording commands
2997 beginCommandBuffer(vk, *cmdBuffer);
2998
2999 pipeline.bind(*cmdBuffer);
3000 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
3001 &descriptorSet.get(), 0u, DE_NULL);
3002
3003 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3004 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &hostUniformWriteBarrier, 0,
3005 (const VkImageMemoryBarrier *)DE_NULL);
3006
3007 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3008 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &hostWriteBarrier, 0,
3009 (const VkImageMemoryBarrier *)DE_NULL);
3010
3011 // Split the workload across all physical devices based on m_splitWorkSize.x()
3012 for (uint32_t physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
3013 {
3014 uint32_t baseGroupX = physDevIdx * m_splitWorkSize.x();
3015 uint32_t baseGroupY = 0;
3016 uint32_t baseGroupZ = 0;
3017
3018 // Split the workload within the physical device based on m_localSize.y() and m_localSize.z()
3019 for (int32_t localIdxY = 0; localIdxY < (m_splitWorkSize.y() / m_localSize.y()); localIdxY++)
3020 {
3021 for (int32_t localIdxZ = 0; localIdxZ < (m_splitWorkSize.z() / m_localSize.z()); localIdxZ++)
3022 {
3023 uint32_t offsetX = baseGroupX;
3024 uint32_t offsetY = baseGroupY + localIdxY * m_localSize.y();
3025 uint32_t offsetZ = baseGroupZ + localIdxZ * m_localSize.z();
3026
3027 uint32_t localSizeX =
3028 (physDevIdx == (m_numPhysDevices - 1)) ? m_workSize.x() - baseGroupX : m_localSize.x();
3029 uint32_t localSizeY = m_localSize.y();
3030 uint32_t localSizeZ = m_localSize.z();
3031
3032 totalWorkloadSize += (localSizeX * localSizeY * localSizeZ);
3033 vk.cmdDispatchBase(*cmdBuffer, offsetX, offsetY, offsetZ, localSizeX, localSizeY, localSizeZ);
3034 }
3035 }
3036 }
3037
3038 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3039 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &shaderWriteBarrier, 0,
3040 (const VkImageMemoryBarrier *)DE_NULL);
3041
3042 endCommandBuffer(vk, *cmdBuffer);
3043 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3044
3045 if (totalWorkloadSize != uint32_t(multiplyComponents(m_workSize)))
3046 TCU_THROW(TestError, "Not covering the entire workload.");
3047
3048 // Validate the results
3049 const Allocation &bufferAllocation = buffer.getAllocation();
3050 invalidateAlloc(vk, device, bufferAllocation);
3051 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3052
3053 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
3054 {
3055 const uint32_t res = bufferPtr[ndx];
3056 const uint32_t ref = ~inputData[ndx];
3057
3058 if (res != ref)
3059 {
3060 std::ostringstream msg;
3061 msg << "Comparison failed for InOut.values[" << ndx << "]";
3062 return tcu::TestStatus::fail(msg.str());
3063 }
3064 }
3065 return tcu::TestStatus::pass("Compute succeeded");
3066 }
3067
3068 class DeviceIndexTest : public vkt::TestCase
3069 {
3070 public:
3071 DeviceIndexTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
3072 const tcu::IVec3 &localsize, const tcu::IVec3 &splitsize,
3073 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3074
3075 virtual void checkSupport(Context &context) const;
3076 void initPrograms(SourceCollections &sourceCollections) const;
3077 TestInstance *createInstance(Context &context) const;
3078
3079 private:
3080 const uint32_t m_numValues;
3081 const tcu::IVec3 m_localSize;
3082 const tcu::IVec3 m_workSize;
3083 const tcu::IVec3 m_splitSize;
3084 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3085 };
3086
3087 class DeviceIndexTestInstance : public ComputeTestInstance
3088 {
3089 public:
3090 DeviceIndexTestInstance(Context &context, const uint32_t numValues, const tcu::IVec3 &localsize,
3091 const tcu::IVec3 &worksize,
3092 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3093 tcu::TestStatus iterate(void);
3094
3095 private:
3096 const uint32_t m_numValues;
3097 const tcu::IVec3 m_localSize;
3098 tcu::IVec3 m_workSize;
3099 };
3100
DeviceIndexTest(tcu::TestContext & testCtx,const std::string & name,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3101 DeviceIndexTest::DeviceIndexTest(tcu::TestContext &testCtx, const std::string &name, const uint32_t numValues,
3102 const tcu::IVec3 &localsize, const tcu::IVec3 &worksize,
3103 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3104 : TestCase(testCtx, name)
3105 , m_numValues(numValues)
3106 , m_localSize(localsize)
3107 , m_workSize(worksize)
3108 , m_computePipelineConstructionType(computePipelineConstructionType)
3109 {
3110 }
3111
checkSupport(Context & context) const3112 void DeviceIndexTest::checkSupport(Context &context) const
3113 {
3114 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
3115 m_computePipelineConstructionType);
3116 }
3117
initPrograms(SourceCollections & sourceCollections) const3118 void DeviceIndexTest::initPrograms(SourceCollections &sourceCollections) const
3119 {
3120 std::ostringstream src;
3121 src << "#version 310 es\n"
3122 << "#extension GL_EXT_device_group : require\n"
3123 << "layout (local_size_x = " << m_localSize.x() << ", local_size_y = " << m_localSize.y()
3124 << ", local_size_z = " << m_localSize.z() << ") in;\n"
3125
3126 << "layout(binding = 0) buffer InOut {\n"
3127 << " uint values[" << de::toString(m_numValues) << "];\n"
3128 << "} sb_inout;\n"
3129
3130 << "layout(binding = 1) readonly uniform uniformInput {\n"
3131 << " uint baseOffset[1+" << VK_MAX_DEVICE_GROUP_SIZE << "];\n"
3132 << "} ubo_in;\n"
3133
3134 << "void main (void) {\n"
3135 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
3136 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
3137 << " uint index = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
3138 "gl_GlobalInvocationID.x;\n"
3139 << " uint offset = numValuesPerInv*index;\n"
3140 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
3141 << " sb_inout.values[offset + ndx] = ubo_in.baseOffset[0] + ubo_in.baseOffset[gl_DeviceIndex + 1];\n"
3142 << "}\n";
3143
3144 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
3145 }
3146
createInstance(Context & context) const3147 TestInstance *DeviceIndexTest::createInstance(Context &context) const
3148 {
3149 return new DeviceIndexTestInstance(context, m_numValues, m_localSize, m_workSize,
3150 m_computePipelineConstructionType);
3151 }
3152
DeviceIndexTestInstance(Context & context,const uint32_t numValues,const tcu::IVec3 & localsize,const tcu::IVec3 & worksize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3153 DeviceIndexTestInstance::DeviceIndexTestInstance(
3154 Context &context, const uint32_t numValues, const tcu::IVec3 &localsize, const tcu::IVec3 &worksize,
3155 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3156
3157 : ComputeTestInstance(context, computePipelineConstructionType, false)
3158 , m_numValues(numValues)
3159 , m_localSize(localsize)
3160 , m_workSize(worksize)
3161 {
3162 }
3163
iterate(void)3164 tcu::TestStatus DeviceIndexTestInstance::iterate(void)
3165 {
3166 const DeviceInterface &vk = getDeviceInterface();
3167 const VkDevice device = getDevice();
3168 const VkQueue queue = getDeviceQueue(vk, device, m_queueFamilyIndex, 0);
3169 SimpleAllocator allocator(vk, device,
3170 getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice()));
3171 const uint32_t allocDeviceMask = (1 << m_numPhysDevices) - 1;
3172 de::Random rnd(0x82ce7f);
3173 Move<VkBuffer> sboBuffer;
3174 vk::Move<vk::VkDeviceMemory> sboBufferMemory;
3175
3176 // Create an uniform and output buffer
3177 const uint32_t uniformBufSize = 4 * (1 + VK_MAX_DEVICE_GROUP_SIZE);
3178 const VkDeviceSize uniformBufferSizeBytes = sizeof(uint32_t) * uniformBufSize;
3179 const BufferWithMemory uniformBuffer(
3180 vk, device, allocator, makeBufferCreateInfo(uniformBufferSizeBytes, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT),
3181 MemoryRequirement::HostVisible);
3182
3183 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * m_numValues;
3184 const BufferWithMemory checkBuffer(vk, device, allocator,
3185 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT),
3186 MemoryRequirement::HostVisible);
3187
3188 // create SBO buffer
3189 {
3190 const VkBufferCreateInfo sboBufferParams = {
3191 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
3192 DE_NULL, // pNext
3193 0u, // flags
3194 (VkDeviceSize)bufferSizeBytes, // size
3195 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // usage
3196 VK_SHARING_MODE_EXCLUSIVE, // sharingMode
3197 1u, // queueFamilyIndexCount
3198 &m_queueFamilyIndex, // pQueueFamilyIndices
3199 };
3200 sboBuffer = createBuffer(vk, device, &sboBufferParams);
3201
3202 VkMemoryRequirements memReqs = getBufferMemoryRequirements(vk, device, sboBuffer.get());
3203 uint32_t memoryTypeNdx = 0;
3204 const VkPhysicalDeviceMemoryProperties deviceMemProps =
3205 getPhysicalDeviceMemoryProperties(m_context.getInstanceInterface(), getPhysicalDevice());
3206 for (memoryTypeNdx = 0; memoryTypeNdx < deviceMemProps.memoryTypeCount; memoryTypeNdx++)
3207 {
3208 if ((memReqs.memoryTypeBits & (1u << memoryTypeNdx)) != 0 &&
3209 (deviceMemProps.memoryTypes[memoryTypeNdx].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) ==
3210 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
3211 break;
3212 }
3213 if (memoryTypeNdx == deviceMemProps.memoryTypeCount)
3214 TCU_THROW(NotSupportedError, "No compatible memory type found");
3215
3216 const VkMemoryAllocateFlagsInfo allocDeviceMaskInfo = {
3217 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, // sType
3218 DE_NULL, // pNext
3219 VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, // flags
3220 allocDeviceMask, // deviceMask
3221 };
3222
3223 VkMemoryAllocateInfo allocInfo = {
3224 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
3225 &allocDeviceMaskInfo, // pNext
3226 memReqs.size, // allocationSize
3227 memoryTypeNdx, // memoryTypeIndex
3228 };
3229
3230 sboBufferMemory = allocateMemory(vk, device, &allocInfo);
3231 VK_CHECK(vk.bindBufferMemory(device, *sboBuffer, sboBufferMemory.get(), 0));
3232 }
3233
3234 // Fill the buffers with data
3235 typedef std::vector<uint32_t> data_vector_t;
3236 data_vector_t uniformInputData(uniformBufSize, 0);
3237
3238 {
3239 const Allocation &bufferAllocation = uniformBuffer.getAllocation();
3240 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3241 for (uint32_t i = 0; i < uniformBufSize; ++i)
3242 uniformInputData[i] = *bufferPtr++ = rnd.getUint32() / 10; // divide to prevent overflow in addition
3243
3244 flushAlloc(vk, device, bufferAllocation);
3245 }
3246
3247 // Create descriptor set
3248 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3249 DescriptorSetLayoutBuilder()
3250 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3251 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3252 .build(vk, device));
3253
3254 const Unique<VkDescriptorPool> descriptorPool(
3255 DescriptorPoolBuilder()
3256 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3257 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
3258 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3259
3260 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3261
3262 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*sboBuffer, 0ull, bufferSizeBytes);
3263 const VkDescriptorBufferInfo uniformBufferDescriptorInfo =
3264 makeDescriptorBufferInfo(*uniformBuffer, 0ull, uniformBufferSizeBytes);
3265
3266 DescriptorSetUpdateBuilder()
3267 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
3268 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
3269 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
3270 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
3271 .update(vk, device);
3272
3273 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
3274 m_context.getBinaryCollection().get("comp"));
3275 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
3276 pipeline.buildPipeline();
3277
3278 const VkBufferMemoryBarrier hostUniformWriteBarrier = makeBufferMemoryBarrier(
3279 VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT, *uniformBuffer, 0ull, uniformBufferSizeBytes);
3280 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(
3281 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *sboBuffer, 0ull, bufferSizeBytes);
3282
3283 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, m_queueFamilyIndex));
3284 const Unique<VkCommandBuffer> cmdBuffer(
3285 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3286
3287 // Verify multiple device masks
3288 for (uint32_t physDevMask = 1; physDevMask < (1u << m_numPhysDevices); physDevMask++)
3289 {
3290 uint32_t constantValPerLoop = 0;
3291 {
3292 const Allocation &bufferAllocation = uniformBuffer.getAllocation();
3293 uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3294 constantValPerLoop = *bufferPtr = rnd.getUint32() / 10; // divide to prevent overflow in addition
3295 flushAlloc(vk, device, bufferAllocation);
3296 }
3297 beginCommandBuffer(vk, *cmdBuffer);
3298
3299 pipeline.bind(*cmdBuffer);
3300 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
3301 &descriptorSet.get(), 0u, DE_NULL);
3302 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3303 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &hostUniformWriteBarrier, 0,
3304 (const VkImageMemoryBarrier *)DE_NULL);
3305
3306 vk.cmdSetDeviceMask(*cmdBuffer, physDevMask);
3307 vk.cmdDispatch(*cmdBuffer, m_workSize.x(), m_workSize.y(), m_workSize.z());
3308
3309 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
3310 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &shaderWriteBarrier, 0,
3311 (const VkImageMemoryBarrier *)DE_NULL);
3312
3313 endCommandBuffer(vk, *cmdBuffer);
3314 submitCommandsAndWait(vk, device, queue, *cmdBuffer, true, physDevMask);
3315 m_context.resetCommandPoolForVKSC(device, *cmdPool);
3316
3317 // Validate the results on all physical devices where compute shader was launched
3318 const VkBufferMemoryBarrier srcBufferBarrier = makeBufferMemoryBarrier(
3319 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *sboBuffer, 0ull, bufferSizeBytes);
3320 const VkBufferMemoryBarrier dstBufferBarrier = makeBufferMemoryBarrier(
3321 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *checkBuffer, 0ull, bufferSizeBytes);
3322 const VkBufferCopy copyParams = {
3323 (VkDeviceSize)0u, // srcOffset
3324 (VkDeviceSize)0u, // dstOffset
3325 bufferSizeBytes // size
3326 };
3327
3328 for (uint32_t physDevIdx = 0; physDevIdx < m_numPhysDevices; physDevIdx++)
3329 {
3330 if (!(1 << physDevIdx & physDevMask))
3331 continue;
3332
3333 const uint32_t deviceMask = 1 << physDevIdx;
3334
3335 beginCommandBuffer(vk, *cmdBuffer);
3336 vk.cmdSetDeviceMask(*cmdBuffer, deviceMask);
3337 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
3338 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &srcBufferBarrier, 0,
3339 (const VkImageMemoryBarrier *)DE_NULL);
3340 vk.cmdCopyBuffer(*cmdBuffer, *sboBuffer, *checkBuffer, 1, ©Params);
3341 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3342 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &dstBufferBarrier, 0,
3343 (const VkImageMemoryBarrier *)DE_NULL);
3344
3345 endCommandBuffer(vk, *cmdBuffer);
3346 submitCommandsAndWait(vk, device, queue, *cmdBuffer, true, deviceMask);
3347
3348 const Allocation &bufferAllocation = checkBuffer.getAllocation();
3349 invalidateAlloc(vk, device, bufferAllocation);
3350 const uint32_t *bufferPtr = static_cast<uint32_t *>(bufferAllocation.getHostPtr());
3351
3352 for (uint32_t ndx = 0; ndx < m_numValues; ++ndx)
3353 {
3354 const uint32_t res = bufferPtr[ndx];
3355 const uint32_t ref = constantValPerLoop + uniformInputData[4 * (physDevIdx + 1)];
3356
3357 if (res != ref)
3358 {
3359 std::ostringstream msg;
3360 msg << "Comparison failed on physical device " << getPhysicalDevice(physDevIdx) << " ( deviceMask "
3361 << deviceMask << " ) for InOut.values[" << ndx << "]";
3362 return tcu::TestStatus::fail(msg.str());
3363 }
3364 }
3365 }
3366 }
3367
3368 return tcu::TestStatus::pass("Compute succeeded");
3369 }
3370
3371 class ConcurrentCompute : public vkt::TestCase
3372 {
3373 public:
3374 ConcurrentCompute(tcu::TestContext &testCtx, const std::string &name,
3375 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3376
3377 virtual void checkSupport(Context &context) const;
3378 void initPrograms(SourceCollections &sourceCollections) const;
3379 TestInstance *createInstance(Context &context) const;
3380
3381 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3382 };
3383
3384 class ConcurrentComputeInstance : public vkt::TestInstance
3385 {
3386 public:
3387 ConcurrentComputeInstance(Context &context,
3388 const vk::ComputePipelineConstructionType computePipelineConstructionType);
3389
3390 tcu::TestStatus iterate(void);
3391
3392 private:
3393 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3394 };
3395
ConcurrentCompute(tcu::TestContext & testCtx,const std::string & name,const vk::ComputePipelineConstructionType computePipelineConstructionType)3396 ConcurrentCompute::ConcurrentCompute(tcu::TestContext &testCtx, const std::string &name,
3397 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3398 : TestCase(testCtx, name)
3399 , m_computePipelineConstructionType(computePipelineConstructionType)
3400 {
3401 }
3402
checkSupport(Context & context) const3403 void ConcurrentCompute::checkSupport(Context &context) const
3404 {
3405 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
3406 m_computePipelineConstructionType);
3407 }
3408
initPrograms(SourceCollections & sourceCollections) const3409 void ConcurrentCompute::initPrograms(SourceCollections &sourceCollections) const
3410 {
3411 std::ostringstream src;
3412 src << "#version 310 es\n"
3413 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3414 << "layout(binding = 0) buffer InOut {\n"
3415 << " uint values[1024];\n"
3416 << "} sb_inout;\n"
3417 << "void main (void) {\n"
3418 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
3419 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
3420 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
3421 "gl_GlobalInvocationID.x;\n"
3422 << " uint offset = numValuesPerInv*groupNdx;\n"
3423 << "\n"
3424 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
3425 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
3426 << "}\n";
3427
3428 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
3429 }
3430
createInstance(Context & context) const3431 TestInstance *ConcurrentCompute::createInstance(Context &context) const
3432 {
3433 return new ConcurrentComputeInstance(context, m_computePipelineConstructionType);
3434 }
3435
ConcurrentComputeInstance(Context & context,const vk::ComputePipelineConstructionType computePipelineConstructionType)3436 ConcurrentComputeInstance::ConcurrentComputeInstance(
3437 Context &context, const vk::ComputePipelineConstructionType computePipelineConstructionType)
3438 : TestInstance(context)
3439 , m_computePipelineConstructionType(computePipelineConstructionType)
3440 {
3441 }
3442
iterate(void)3443 tcu::TestStatus ConcurrentComputeInstance::iterate(void)
3444 {
3445 enum
3446 {
3447 NO_MATCH_FOUND = ~((uint32_t)0),
3448 ERROR_NONE = 0,
3449 ERROR_WAIT = 1,
3450 ERROR_ORDER = 2
3451 };
3452
3453 struct Queues
3454 {
3455 VkQueue queue;
3456 uint32_t queueFamilyIndex;
3457 };
3458
3459 // const DeviceInterface& vk = m_context.getDeviceInterface();
3460 const uint32_t numValues = 1024;
3461 const CustomInstance instance(createCustomInstanceFromContext(m_context));
3462 const InstanceDriver &instanceDriver(instance.getDriver());
3463 const VkPhysicalDevice physicalDevice =
3464 chooseDevice(instanceDriver, instance, m_context.getTestContext().getCommandLine());
3465 tcu::TestLog &log = m_context.getTestContext().getLog();
3466 vk::Move<vk::VkDevice> logicalDevice;
3467 std::vector<VkQueueFamilyProperties> queueFamilyProperties;
3468 VkDeviceCreateInfo deviceInfo;
3469 VkPhysicalDeviceFeatures2 deviceFeatures2 = initVulkanStructure();
3470 VkPhysicalDeviceFeatures deviceFeatures;
3471 const float queuePriorities[2] = {1.0f, 0.0f};
3472 VkDeviceQueueCreateInfo queueInfos[2];
3473 Queues queues[2] = {{DE_NULL, (uint32_t)NO_MATCH_FOUND}, {DE_NULL, (uint32_t)NO_MATCH_FOUND}};
3474
3475 queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
3476
3477 for (uint32_t queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
3478 {
3479 if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
3480 {
3481 if (NO_MATCH_FOUND == queues[0].queueFamilyIndex)
3482 queues[0].queueFamilyIndex = queueNdx;
3483
3484 if (queues[0].queueFamilyIndex != queueNdx || queueFamilyProperties[queueNdx].queueCount > 1u)
3485 {
3486 queues[1].queueFamilyIndex = queueNdx;
3487 break;
3488 }
3489 }
3490 }
3491
3492 if (queues[0].queueFamilyIndex == NO_MATCH_FOUND || queues[1].queueFamilyIndex == NO_MATCH_FOUND)
3493 TCU_THROW(NotSupportedError, "Queues couldn't be created");
3494
3495 for (int queueNdx = 0; queueNdx < 2; ++queueNdx)
3496 {
3497 VkDeviceQueueCreateInfo queueInfo;
3498 deMemset(&queueInfo, 0, sizeof(queueInfo));
3499
3500 queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
3501 queueInfo.pNext = DE_NULL;
3502 queueInfo.flags = (VkDeviceQueueCreateFlags)0u;
3503 queueInfo.queueFamilyIndex = queues[queueNdx].queueFamilyIndex;
3504 queueInfo.queueCount = (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex) ? 2 : 1;
3505 queueInfo.pQueuePriorities = (queueInfo.queueCount == 2) ? queuePriorities : &queuePriorities[queueNdx];
3506
3507 queueInfos[queueNdx] = queueInfo;
3508
3509 if (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex)
3510 break;
3511 }
3512
3513 void *pNext = DE_NULL;
3514
3515 deMemset(&deviceInfo, 0, sizeof(deviceInfo));
3516 instanceDriver.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
3517
3518 deviceFeatures2.features = deviceFeatures;
3519
3520 std::vector<const char *> deviceExtensions;
3521
3522 #ifndef CTS_USES_VULKANSC
3523 VkPhysicalDeviceDynamicRenderingFeaturesKHR dynamicRenderingFeatures = initVulkanStructure();
3524 dynamicRenderingFeatures.dynamicRendering = VK_TRUE;
3525 VkPhysicalDeviceShaderObjectFeaturesEXT shaderObjectFeatures = initVulkanStructure(&dynamicRenderingFeatures);
3526 shaderObjectFeatures.shaderObject = VK_TRUE;
3527
3528 if (m_computePipelineConstructionType != COMPUTE_PIPELINE_CONSTRUCTION_TYPE_PIPELINE)
3529 {
3530 deviceExtensions.push_back("VK_EXT_shader_object");
3531 deviceFeatures2.pNext = &shaderObjectFeatures;
3532 pNext = &deviceFeatures2;
3533 }
3534 #endif
3535
3536 #ifdef CTS_USES_VULKANSC
3537 VkDeviceObjectReservationCreateInfo memReservationInfo =
3538 m_context.getTestContext().getCommandLine().isSubProcess() ? m_context.getResourceInterface()->getStatMax() :
3539 resetDeviceObjectReservationCreateInfo();
3540 memReservationInfo.pNext = pNext;
3541 pNext = &memReservationInfo;
3542
3543 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
3544 sc10Features.pNext = pNext;
3545 pNext = &sc10Features;
3546
3547 VkPipelineCacheCreateInfo pcCI;
3548 std::vector<VkPipelinePoolSize> poolSizes;
3549 if (m_context.getTestContext().getCommandLine().isSubProcess())
3550 {
3551 if (m_context.getResourceInterface()->getCacheDataSize() > 0)
3552 {
3553 pcCI = {
3554 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
3555 DE_NULL, // const void* pNext;
3556 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
3557 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
3558 m_context.getResourceInterface()->getCacheDataSize(), // uintptr_t initialDataSize;
3559 m_context.getResourceInterface()->getCacheData() // const void* pInitialData;
3560 };
3561 memReservationInfo.pipelineCacheCreateInfoCount = 1;
3562 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
3563 }
3564
3565 poolSizes = m_context.getResourceInterface()->getPipelinePoolSizes();
3566 if (!poolSizes.empty())
3567 {
3568 memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
3569 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
3570 }
3571 }
3572 #endif // CTS_USES_VULKANSC
3573
3574 deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
3575 deviceInfo.pNext = pNext;
3576 deviceInfo.enabledExtensionCount = (uint32_t)deviceExtensions.size();
3577 deviceInfo.ppEnabledExtensionNames = deviceExtensions.data();
3578 deviceInfo.enabledLayerCount = 0u;
3579 deviceInfo.ppEnabledLayerNames = DE_NULL;
3580 deviceInfo.pEnabledFeatures = (deviceFeatures2.pNext == DE_NULL) ? &deviceFeatures : DE_NULL;
3581 deviceInfo.queueCreateInfoCount = (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex) ? 1 : 2;
3582 deviceInfo.pQueueCreateInfos = queueInfos;
3583
3584 logicalDevice =
3585 createCustomDevice(m_context.getTestContext().getCommandLine().isValidationEnabled(),
3586 m_context.getPlatformInterface(), instance, instanceDriver, physicalDevice, &deviceInfo);
3587
3588 #ifndef CTS_USES_VULKANSC
3589 de::MovePtr<vk::DeviceDriver> deviceDriver = de::MovePtr<DeviceDriver>(
3590 new DeviceDriver(m_context.getPlatformInterface(), instance, *logicalDevice, m_context.getUsedApiVersion(),
3591 m_context.getTestContext().getCommandLine()));
3592 #else
3593 de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter> deviceDriver =
3594 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
3595 new DeviceDriverSC(m_context.getPlatformInterface(), instance, *logicalDevice,
3596 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
3597 m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
3598 m_context.getUsedApiVersion()),
3599 vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), *logicalDevice));
3600 #endif // CTS_USES_VULKANSC
3601 vk::DeviceInterface &vk = *deviceDriver;
3602
3603 for (uint32_t queueReqNdx = 0; queueReqNdx < 2; ++queueReqNdx)
3604 {
3605 if (queues[0].queueFamilyIndex == queues[1].queueFamilyIndex)
3606 vk.getDeviceQueue(*logicalDevice, queues[queueReqNdx].queueFamilyIndex, queueReqNdx,
3607 &queues[queueReqNdx].queue);
3608 else
3609 vk.getDeviceQueue(*logicalDevice, queues[queueReqNdx].queueFamilyIndex, 0u, &queues[queueReqNdx].queue);
3610 }
3611
3612 // Create an input/output buffers
3613 const VkPhysicalDeviceMemoryProperties memoryProperties =
3614 vk::getPhysicalDeviceMemoryProperties(instanceDriver, physicalDevice);
3615
3616 de::MovePtr<SimpleAllocator> allocator =
3617 de::MovePtr<SimpleAllocator>(new SimpleAllocator(vk, *logicalDevice, memoryProperties));
3618 const VkDeviceSize bufferSizeBytes = sizeof(uint32_t) * numValues;
3619 const BufferWithMemory buffer1(vk, *logicalDevice, *allocator,
3620 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
3621 MemoryRequirement::HostVisible);
3622 const BufferWithMemory buffer2(vk, *logicalDevice, *allocator,
3623 makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
3624 MemoryRequirement::HostVisible);
3625
3626 // Fill the buffers with data
3627
3628 typedef std::vector<uint32_t> data_vector_t;
3629 data_vector_t inputData(numValues);
3630
3631 {
3632 de::Random rnd(0x82ce7f);
3633 const Allocation &bufferAllocation1 = buffer1.getAllocation();
3634 const Allocation &bufferAllocation2 = buffer2.getAllocation();
3635 uint32_t *bufferPtr1 = static_cast<uint32_t *>(bufferAllocation1.getHostPtr());
3636 uint32_t *bufferPtr2 = static_cast<uint32_t *>(bufferAllocation2.getHostPtr());
3637
3638 for (uint32_t i = 0; i < numValues; ++i)
3639 {
3640 uint32_t val = rnd.getUint32();
3641 inputData[i] = val;
3642 *bufferPtr1++ = val;
3643 *bufferPtr2++ = val;
3644 }
3645
3646 flushAlloc(vk, *logicalDevice, bufferAllocation1);
3647 flushAlloc(vk, *logicalDevice, bufferAllocation2);
3648 }
3649
3650 // Create descriptor sets
3651
3652 const Unique<VkDescriptorSetLayout> descriptorSetLayout1(
3653 DescriptorSetLayoutBuilder()
3654 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3655 .build(vk, *logicalDevice));
3656
3657 const Unique<VkDescriptorPool> descriptorPool1(
3658 DescriptorPoolBuilder()
3659 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3660 .build(vk, *logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3661
3662 const Unique<VkDescriptorSet> descriptorSet1(
3663 makeDescriptorSet(vk, *logicalDevice, *descriptorPool1, *descriptorSetLayout1));
3664
3665 const VkDescriptorBufferInfo bufferDescriptorInfo1 = makeDescriptorBufferInfo(*buffer1, 0ull, bufferSizeBytes);
3666 DescriptorSetUpdateBuilder()
3667 .writeSingle(*descriptorSet1, DescriptorSetUpdateBuilder::Location::binding(0u),
3668 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo1)
3669 .update(vk, *logicalDevice);
3670
3671 const Unique<VkDescriptorSetLayout> descriptorSetLayout2(
3672 DescriptorSetLayoutBuilder()
3673 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
3674 .build(vk, *logicalDevice));
3675
3676 const Unique<VkDescriptorPool> descriptorPool2(
3677 DescriptorPoolBuilder()
3678 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
3679 .build(vk, *logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3680
3681 const Unique<VkDescriptorSet> descriptorSet2(
3682 makeDescriptorSet(vk, *logicalDevice, *descriptorPool2, *descriptorSetLayout2));
3683
3684 const VkDescriptorBufferInfo bufferDescriptorInfo2 = makeDescriptorBufferInfo(*buffer2, 0ull, bufferSizeBytes);
3685 DescriptorSetUpdateBuilder()
3686 .writeSingle(*descriptorSet2, DescriptorSetUpdateBuilder::Location::binding(0u),
3687 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo2)
3688 .update(vk, *logicalDevice);
3689
3690 // Perform the computation
3691
3692 const Unique<VkShaderModule> shaderModule(
3693 createShaderModule(vk, *logicalDevice, m_context.getBinaryCollection().get("comp"), 0u));
3694
3695 ComputePipelineWrapper pipeline1(vk, *logicalDevice, m_computePipelineConstructionType,
3696 m_context.getBinaryCollection().get("comp"));
3697 pipeline1.setDescriptorSetLayout(*descriptorSetLayout1);
3698 pipeline1.buildPipeline();
3699 const VkBufferMemoryBarrier hostWriteBarrier1 =
3700 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
3701 const VkBufferMemoryBarrier shaderWriteBarrier1 =
3702 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer1, 0ull, bufferSizeBytes);
3703 const Unique<VkCommandPool> cmdPool1(makeCommandPool(vk, *logicalDevice, queues[0].queueFamilyIndex));
3704 const Unique<VkCommandBuffer> cmdBuffer1(
3705 allocateCommandBuffer(vk, *logicalDevice, *cmdPool1, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3706
3707 ComputePipelineWrapper pipeline2(vk, *logicalDevice, m_computePipelineConstructionType,
3708 m_context.getBinaryCollection().get("comp"));
3709 pipeline2.setDescriptorSetLayout(*descriptorSetLayout2);
3710 pipeline2.buildPipeline();
3711 const VkBufferMemoryBarrier hostWriteBarrier2 =
3712 makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer2, 0ull, bufferSizeBytes);
3713 const VkBufferMemoryBarrier shaderWriteBarrier2 =
3714 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer2, 0ull, bufferSizeBytes);
3715 const Unique<VkCommandPool> cmdPool2(makeCommandPool(vk, *logicalDevice, queues[1].queueFamilyIndex));
3716 const Unique<VkCommandBuffer> cmdBuffer2(
3717 allocateCommandBuffer(vk, *logicalDevice, *cmdPool2, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
3718
3719 // Command buffer 1
3720
3721 beginCommandBuffer(vk, *cmdBuffer1);
3722 pipeline1.bind(*cmdBuffer1);
3723 vk.cmdBindDescriptorSets(*cmdBuffer1, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline1.getPipelineLayout(), 0u, 1u,
3724 &descriptorSet1.get(), 0u, DE_NULL);
3725 vk.cmdPipelineBarrier(*cmdBuffer1, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3726 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &hostWriteBarrier1, 0,
3727 (const VkImageMemoryBarrier *)DE_NULL);
3728 vk.cmdDispatch(*cmdBuffer1, 1, 1, 1);
3729 vk.cmdPipelineBarrier(*cmdBuffer1, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3730 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &shaderWriteBarrier1, 0,
3731 (const VkImageMemoryBarrier *)DE_NULL);
3732 endCommandBuffer(vk, *cmdBuffer1);
3733
3734 // Command buffer 2
3735
3736 beginCommandBuffer(vk, *cmdBuffer2);
3737 pipeline2.bind(*cmdBuffer2);
3738 vk.cmdBindDescriptorSets(*cmdBuffer2, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline2.getPipelineLayout(), 0u, 1u,
3739 &descriptorSet2.get(), 0u, DE_NULL);
3740 vk.cmdPipelineBarrier(*cmdBuffer2, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
3741 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &hostWriteBarrier2, 0,
3742 (const VkImageMemoryBarrier *)DE_NULL);
3743 vk.cmdDispatch(*cmdBuffer2, 1, 1, 1);
3744 vk.cmdPipelineBarrier(*cmdBuffer2, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
3745 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &shaderWriteBarrier2, 0,
3746 (const VkImageMemoryBarrier *)DE_NULL);
3747 endCommandBuffer(vk, *cmdBuffer2);
3748
3749 VkSubmitInfo submitInfo1 = {
3750 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
3751 DE_NULL, // pNext
3752 0u, // waitSemaphoreCount
3753 DE_NULL, // pWaitSemaphores
3754 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
3755 1u, // commandBufferCount
3756 &cmdBuffer1.get(), // pCommandBuffers
3757 0u, // signalSemaphoreCount
3758 DE_NULL // pSignalSemaphores
3759 };
3760
3761 VkSubmitInfo submitInfo2 = {
3762 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
3763 DE_NULL, // pNext
3764 0u, // waitSemaphoreCount
3765 DE_NULL, // pWaitSemaphores
3766 (const VkPipelineStageFlags *)DE_NULL, // pWaitDstStageMask
3767 1u, // commandBufferCount
3768 &cmdBuffer2.get(), // pCommandBuffers
3769 0u, // signalSemaphoreCount
3770 DE_NULL // pSignalSemaphores
3771 };
3772
3773 // Wait for completion
3774 const Unique<VkFence> fence1(createFence(vk, *logicalDevice));
3775 const Unique<VkFence> fence2(createFence(vk, *logicalDevice));
3776
3777 VK_CHECK(vk.queueSubmit(queues[0].queue, 1u, &submitInfo1, *fence1));
3778 VK_CHECK(vk.queueSubmit(queues[1].queue, 1u, &submitInfo2, *fence2));
3779
3780 int err = ERROR_NONE;
3781
3782 // First wait for the low-priority queue
3783 if (VK_SUCCESS != vk.waitForFences(*logicalDevice, 1u, &fence2.get(), true, ~0ull))
3784 err = ERROR_WAIT;
3785
3786 // If the high-priority queue hasn't finished, we have a problem.
3787 if (VK_SUCCESS != vk.getFenceStatus(*logicalDevice, fence1.get()))
3788 if (err == ERROR_NONE)
3789 err = ERROR_ORDER;
3790
3791 // Wait for the high-priority fence so we don't get errors on teardown.
3792 vk.waitForFences(*logicalDevice, 1u, &fence1.get(), true, ~0ull);
3793
3794 // If we fail() before waiting for all of the fences, error will come from
3795 // teardown instead of the error we want.
3796
3797 if (err == ERROR_WAIT)
3798 {
3799 return tcu::TestStatus::fail("Failed waiting for low-priority queue fence.");
3800 }
3801
3802 // Validate the results
3803
3804 const Allocation &bufferAllocation1 = buffer1.getAllocation();
3805 invalidateAlloc(vk, *logicalDevice, bufferAllocation1);
3806 const uint32_t *bufferPtr1 = static_cast<uint32_t *>(bufferAllocation1.getHostPtr());
3807
3808 const Allocation &bufferAllocation2 = buffer2.getAllocation();
3809 invalidateAlloc(vk, *logicalDevice, bufferAllocation2);
3810 const uint32_t *bufferPtr2 = static_cast<uint32_t *>(bufferAllocation2.getHostPtr());
3811
3812 for (uint32_t ndx = 0; ndx < numValues; ++ndx)
3813 {
3814 const uint32_t res1 = bufferPtr1[ndx];
3815 const uint32_t res2 = bufferPtr2[ndx];
3816 const uint32_t inp = inputData[ndx];
3817 const uint32_t ref = ~inp;
3818
3819 if (res1 != ref || res1 != res2)
3820 {
3821 std::ostringstream msg;
3822 msg << "Comparison failed for InOut.values[" << ndx << "] ref:" << ref << " res1:" << res1
3823 << " res2:" << res2 << " inp:" << inp;
3824 return tcu::TestStatus::fail(msg.str());
3825 }
3826 }
3827
3828 if (err == ERROR_ORDER)
3829 log << tcu::TestLog::Message
3830 << "Note: Low-priority queue was faster than high-priority one. This is not an error, but priorities may "
3831 "be inverted."
3832 << tcu::TestLog::EndMessage;
3833
3834 return tcu::TestStatus::pass("Test passed");
3835 }
3836
3837 class EmptyWorkGroupCase : public vkt::TestCase
3838 {
3839 public:
3840 EmptyWorkGroupCase(tcu::TestContext &testCtx, const std::string &name, const tcu::UVec3 &dispatchSize,
3841 const vk::ComputePipelineConstructionType computePipelineConstructionType);
~EmptyWorkGroupCase(void)3842 virtual ~EmptyWorkGroupCase(void)
3843 {
3844 }
3845
3846 virtual void checkSupport(Context &context) const override;
3847 TestInstance *createInstance(Context &context) const override;
3848 void initPrograms(vk::SourceCollections &programCollection) const override;
3849
3850 protected:
3851 const tcu::UVec3 m_dispatchSize;
3852 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3853 };
3854
3855 class EmptyWorkGroupInstance : public vkt::TestInstance
3856 {
3857 public:
EmptyWorkGroupInstance(Context & context,const tcu::UVec3 & dispatchSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3858 EmptyWorkGroupInstance(Context &context, const tcu::UVec3 &dispatchSize,
3859 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3860 : vkt::TestInstance(context)
3861 , m_dispatchSize(dispatchSize)
3862 , m_computePipelineConstructionType(computePipelineConstructionType)
3863 {
3864 }
~EmptyWorkGroupInstance(void)3865 virtual ~EmptyWorkGroupInstance(void)
3866 {
3867 }
3868
3869 tcu::TestStatus iterate(void) override;
3870
3871 protected:
3872 const tcu::UVec3 m_dispatchSize;
3873 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
3874 };
3875
EmptyWorkGroupCase(tcu::TestContext & testCtx,const std::string & name,const tcu::UVec3 & dispatchSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)3876 EmptyWorkGroupCase::EmptyWorkGroupCase(tcu::TestContext &testCtx, const std::string &name,
3877 const tcu::UVec3 &dispatchSize,
3878 const vk::ComputePipelineConstructionType computePipelineConstructionType)
3879 : vkt::TestCase(testCtx, name)
3880 , m_dispatchSize(dispatchSize)
3881 , m_computePipelineConstructionType(computePipelineConstructionType)
3882 {
3883 DE_ASSERT(m_dispatchSize.x() == 0u || m_dispatchSize.y() == 0u || m_dispatchSize.z() == 0u);
3884 }
3885
checkSupport(Context & context) const3886 void EmptyWorkGroupCase::checkSupport(Context &context) const
3887 {
3888 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
3889 m_computePipelineConstructionType);
3890 }
3891
createInstance(Context & context) const3892 TestInstance *EmptyWorkGroupCase::createInstance(Context &context) const
3893 {
3894 return new EmptyWorkGroupInstance(context, m_dispatchSize, m_computePipelineConstructionType);
3895 }
3896
initPrograms(vk::SourceCollections & programCollection) const3897 void EmptyWorkGroupCase::initPrograms(vk::SourceCollections &programCollection) const
3898 {
3899 std::ostringstream comp;
3900 comp << "#version 450\n"
3901 << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
3902 << "layout (set=0, binding=0) buffer VerificationBlock { uint value; } verif;\n"
3903 << "void main () { atomicAdd(verif.value, 1u); }\n";
3904 programCollection.glslSources.add("comp") << glu::ComputeSource(comp.str());
3905 }
3906
iterate(void)3907 tcu::TestStatus EmptyWorkGroupInstance::iterate(void)
3908 {
3909 const auto &vkd = m_context.getDeviceInterface();
3910 const auto device = m_context.getDevice();
3911 auto &alloc = m_context.getDefaultAllocator();
3912 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
3913 const auto queue = m_context.getUniversalQueue();
3914
3915 const auto verifBufferSize = static_cast<VkDeviceSize>(sizeof(uint32_t));
3916 const auto verifBufferInfo = makeBufferCreateInfo(verifBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3917 BufferWithMemory verifBuffer(vkd, device, alloc, verifBufferInfo, MemoryRequirement::HostVisible);
3918 auto &verifBufferAlloc = verifBuffer.getAllocation();
3919 void *verifBufferPtr = verifBufferAlloc.getHostPtr();
3920
3921 deMemset(verifBufferPtr, 0, static_cast<size_t>(verifBufferSize));
3922 flushAlloc(vkd, device, verifBufferAlloc);
3923
3924 DescriptorSetLayoutBuilder layoutBuilder;
3925 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
3926 const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
3927
3928 ComputePipelineWrapper pipeline(vkd, device, m_computePipelineConstructionType,
3929 m_context.getBinaryCollection().get("comp"));
3930 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
3931 pipeline.buildPipeline();
3932
3933 DescriptorPoolBuilder poolBuilder;
3934 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3935 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3936 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
3937
3938 DescriptorSetUpdateBuilder updateBuilder;
3939 const auto verifBufferDescInfo = makeDescriptorBufferInfo(verifBuffer.get(), 0ull, verifBufferSize);
3940 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
3941 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &verifBufferDescInfo);
3942 updateBuilder.update(vkd, device);
3943
3944 const auto cmdPool = makeCommandPool(vkd, device, queueIndex);
3945 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3946 const auto cmdBuffer = cmdBufferPtr.get();
3947
3948 beginCommandBuffer(vkd, cmdBuffer);
3949 pipeline.bind(cmdBuffer);
3950 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.getPipelineLayout(), 0u, 1u,
3951 &descriptorSet.get(), 0u, nullptr);
3952 vkd.cmdDispatch(cmdBuffer, m_dispatchSize.x(), m_dispatchSize.y(), m_dispatchSize.z());
3953
3954 const auto readWriteAccess = (VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT);
3955 const auto computeToCompute = makeMemoryBarrier(readWriteAccess, readWriteAccess);
3956 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0U,
3957 1u, &computeToCompute, 0u, nullptr, 0u, nullptr);
3958
3959 vkd.cmdDispatch(cmdBuffer, 1u, 1u, 1u);
3960
3961 const auto computeToHost = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
3962 vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
3963 &computeToHost, 0u, nullptr, 0u, nullptr);
3964
3965 endCommandBuffer(vkd, cmdBuffer);
3966 submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3967
3968 uint32_t value;
3969 invalidateAlloc(vkd, device, verifBufferAlloc);
3970 deMemcpy(&value, verifBufferPtr, sizeof(value));
3971
3972 if (value != 1u)
3973 {
3974 std::ostringstream msg;
3975 msg << "Unexpected value found in buffer: " << value << " while expecting 1";
3976 TCU_FAIL(msg.str());
3977 }
3978
3979 return tcu::TestStatus::pass("Pass");
3980 }
3981
3982 class MaxWorkGroupSizeTest : public vkt::TestCase
3983 {
3984 public:
3985 enum class Axis
3986 {
3987 X = 0,
3988 Y = 1,
3989 Z = 2
3990 };
3991
3992 struct Params
3993 {
3994 // Which axis to maximize.
3995 Axis axis;
3996 };
3997
3998 MaxWorkGroupSizeTest(tcu::TestContext &testCtx, const std::string &name, const Params ¶ms,
3999 const vk::ComputePipelineConstructionType computePipelineConstructionType);
~MaxWorkGroupSizeTest(void)4000 virtual ~MaxWorkGroupSizeTest(void)
4001 {
4002 }
4003
4004 virtual void initPrograms(vk::SourceCollections &programCollection) const;
4005 virtual TestInstance *createInstance(Context &context) const;
4006 virtual void checkSupport(Context &context) const;
4007
4008 // Helper to transform the axis value to an index.
4009 static int getIndex(Axis axis);
4010
4011 // Helper returning the number of invocations according to the test parameters.
4012 static uint32_t getInvocations(const Params ¶ms, const vk::InstanceInterface &vki,
4013 vk::VkPhysicalDevice physicalDevice,
4014 const vk::VkPhysicalDeviceProperties *devProperties = nullptr);
4015
4016 // Helper returning the buffer size needed to this test.
4017 static uint32_t getSSBOSize(uint32_t invocations);
4018
4019 private:
4020 Params m_params;
4021 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
4022 };
4023
4024 class MaxWorkGroupSizeInstance : public vkt::TestInstance
4025 {
4026 public:
4027 MaxWorkGroupSizeInstance(Context &context, const MaxWorkGroupSizeTest::Params ¶ms,
4028 const vk::ComputePipelineConstructionType computePipelineConstructionType);
~MaxWorkGroupSizeInstance(void)4029 virtual ~MaxWorkGroupSizeInstance(void)
4030 {
4031 }
4032
4033 virtual tcu::TestStatus iterate(void);
4034
4035 private:
4036 MaxWorkGroupSizeTest::Params m_params;
4037 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
4038 };
4039
getIndex(Axis axis)4040 int MaxWorkGroupSizeTest::getIndex(Axis axis)
4041 {
4042 const int ret = static_cast<int>(axis);
4043 DE_ASSERT(ret >= static_cast<int>(Axis::X) && ret <= static_cast<int>(Axis::Z));
4044 return ret;
4045 }
4046
getInvocations(const Params & params,const vk::InstanceInterface & vki,vk::VkPhysicalDevice physicalDevice,const vk::VkPhysicalDeviceProperties * devProperties)4047 uint32_t MaxWorkGroupSizeTest::getInvocations(const Params ¶ms, const vk::InstanceInterface &vki,
4048 vk::VkPhysicalDevice physicalDevice,
4049 const vk::VkPhysicalDeviceProperties *devProperties)
4050 {
4051 const auto axis = getIndex(params.axis);
4052
4053 if (devProperties)
4054 return devProperties->limits.maxComputeWorkGroupSize[axis];
4055 return vk::getPhysicalDeviceProperties(vki, physicalDevice).limits.maxComputeWorkGroupSize[axis];
4056 }
4057
getSSBOSize(uint32_t invocations)4058 uint32_t MaxWorkGroupSizeTest::getSSBOSize(uint32_t invocations)
4059 {
4060 return invocations * static_cast<uint32_t>(sizeof(uint32_t));
4061 }
4062
MaxWorkGroupSizeTest(tcu::TestContext & testCtx,const std::string & name,const Params & params,const vk::ComputePipelineConstructionType computePipelineConstructionType)4063 MaxWorkGroupSizeTest::MaxWorkGroupSizeTest(tcu::TestContext &testCtx, const std::string &name, const Params ¶ms,
4064 const vk::ComputePipelineConstructionType computePipelineConstructionType)
4065 : vkt::TestCase(testCtx, name)
4066 , m_params(params)
4067 , m_computePipelineConstructionType(computePipelineConstructionType)
4068 {
4069 }
4070
initPrograms(vk::SourceCollections & programCollection) const4071 void MaxWorkGroupSizeTest::initPrograms(vk::SourceCollections &programCollection) const
4072 {
4073 std::ostringstream shader;
4074
4075 // The actual local sizes will be set using spec constants when running the test instance.
4076 shader << "#version 450\n"
4077 << "\n"
4078 << "layout(constant_id=0) const int local_size_x_val = 1;\n"
4079 << "layout(constant_id=1) const int local_size_y_val = 1;\n"
4080 << "layout(constant_id=2) const int local_size_z_val = 1;\n"
4081 << "\n"
4082 << "layout(local_size_x_id=0, local_size_y_id=1, local_size_z_id=2) in;\n"
4083 << "\n"
4084 << "layout(set=0, binding=0) buffer StorageBuffer {\n"
4085 << " uint values[];\n"
4086 << "} ssbo;\n"
4087 << "\n"
4088 << "void main() {\n"
4089 << " ssbo.values[gl_LocalInvocationIndex] = 1u;\n"
4090 << "}\n";
4091
4092 programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
4093 }
4094
createInstance(Context & context) const4095 TestInstance *MaxWorkGroupSizeTest::createInstance(Context &context) const
4096 {
4097 return new MaxWorkGroupSizeInstance(context, m_params, m_computePipelineConstructionType);
4098 }
4099
checkSupport(Context & context) const4100 void MaxWorkGroupSizeTest::checkSupport(Context &context) const
4101 {
4102 const auto &vki = context.getInstanceInterface();
4103 const auto physicalDevice = context.getPhysicalDevice();
4104
4105 const auto properties = vk::getPhysicalDeviceProperties(vki, physicalDevice);
4106 const auto invocations = getInvocations(m_params, vki, physicalDevice, &properties);
4107
4108 if (invocations > properties.limits.maxComputeWorkGroupInvocations)
4109 TCU_FAIL("Reported workgroup size limit in the axis is greater than the global invocation limit");
4110
4111 if (properties.limits.maxStorageBufferRange / static_cast<uint32_t>(sizeof(uint32_t)) < invocations)
4112 TCU_THROW(NotSupportedError, "Maximum supported storage buffer range too small");
4113
4114 checkShaderObjectRequirements(vki, physicalDevice, m_computePipelineConstructionType);
4115 }
4116
MaxWorkGroupSizeInstance(Context & context,const MaxWorkGroupSizeTest::Params & params,const vk::ComputePipelineConstructionType computePipelineConstructionType)4117 MaxWorkGroupSizeInstance::MaxWorkGroupSizeInstance(
4118 Context &context, const MaxWorkGroupSizeTest::Params ¶ms,
4119 const vk::ComputePipelineConstructionType computePipelineConstructionType)
4120 : vkt::TestInstance(context)
4121 , m_params(params)
4122 , m_computePipelineConstructionType(computePipelineConstructionType)
4123 {
4124 }
4125
iterate(void)4126 tcu::TestStatus MaxWorkGroupSizeInstance::iterate(void)
4127 {
4128 const auto &vki = m_context.getInstanceInterface();
4129 const auto &vkd = m_context.getDeviceInterface();
4130 const auto physicalDevice = m_context.getPhysicalDevice();
4131 const auto device = m_context.getDevice();
4132 auto &alloc = m_context.getDefaultAllocator();
4133 const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
4134 const auto queue = m_context.getUniversalQueue();
4135 auto &log = m_context.getTestContext().getLog();
4136
4137 const auto axis = MaxWorkGroupSizeTest::getIndex(m_params.axis);
4138 const auto invocations = MaxWorkGroupSizeTest::getInvocations(m_params, vki, physicalDevice);
4139 const auto ssboSize = static_cast<vk::VkDeviceSize>(MaxWorkGroupSizeTest::getSSBOSize(invocations));
4140
4141 log << tcu::TestLog::Message << "Running test with " << invocations << " invocations on axis " << axis
4142 << " using a storage buffer size of " << ssboSize << " bytes" << tcu::TestLog::EndMessage;
4143
4144 // Main SSBO buffer.
4145 const auto ssboInfo = vk::makeBufferCreateInfo(ssboSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4146 vk::BufferWithMemory ssbo(vkd, device, alloc, ssboInfo, vk::MemoryRequirement::HostVisible);
4147
4148 // Descriptor set layouts.
4149 vk::DescriptorSetLayoutBuilder layoutBuilder;
4150 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
4151 const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
4152
4153 // Specialization constants: set the number of invocations in the appropriate local size id.
4154 const auto entrySize = static_cast<uintptr_t>(sizeof(int32_t));
4155 int32_t specializationData[3] = {1, 1, 1};
4156 specializationData[axis] = static_cast<int32_t>(invocations);
4157
4158 const vk::VkSpecializationMapEntry specializationMaps[3] = {
4159 {
4160 0u, // uint32_t constantID;
4161 0u, // uint32_t offset;
4162 entrySize, // uintptr_t size;
4163 },
4164 {
4165 1u, // uint32_t constantID;
4166 static_cast<uint32_t>(entrySize), // uint32_t offset;
4167 entrySize, // uintptr_t size;
4168 },
4169 {
4170 2u, // uint32_t constantID;
4171 static_cast<uint32_t>(entrySize * 2u), // uint32_t offset;
4172 entrySize, // uintptr_t size;
4173 },
4174 };
4175
4176 const vk::VkSpecializationInfo specializationInfo = {
4177 3u, // uint32_t mapEntryCount;
4178 specializationMaps, // const VkSpecializationMapEntry* pMapEntries;
4179 static_cast<uintptr_t>(sizeof(specializationData)), // uintptr_t dataSize;
4180 specializationData, // const void* pData;
4181 };
4182
4183 ComputePipelineWrapper testPipeline(vkd, device, m_computePipelineConstructionType,
4184 m_context.getBinaryCollection().get("comp"));
4185 testPipeline.setDescriptorSetLayout(descriptorSetLayout.get());
4186 testPipeline.setSpecializationInfo(specializationInfo);
4187 testPipeline.buildPipeline();
4188
4189 // Create descriptor pool and set.
4190 vk::DescriptorPoolBuilder poolBuilder;
4191 poolBuilder.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
4192 const auto descriptorPool =
4193 poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
4194 const auto descriptorSet = vk::makeDescriptorSet(vkd, device, descriptorPool.get(), descriptorSetLayout.get());
4195
4196 // Update descriptor set.
4197 const vk::VkDescriptorBufferInfo ssboBufferInfo = {
4198 ssbo.get(), // VkBuffer buffer;
4199 0u, // VkDeviceSize offset;
4200 VK_WHOLE_SIZE, // VkDeviceSize range;
4201 };
4202
4203 vk::DescriptorSetUpdateBuilder updateBuilder;
4204 updateBuilder.writeSingle(descriptorSet.get(), vk::DescriptorSetUpdateBuilder::Location::binding(0u),
4205 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &ssboBufferInfo);
4206 updateBuilder.update(vkd, device);
4207
4208 // Clear buffer.
4209 auto &ssboAlloc = ssbo.getAllocation();
4210 void *ssboPtr = ssboAlloc.getHostPtr();
4211 deMemset(ssboPtr, 0, static_cast<size_t>(ssboSize));
4212 vk::flushAlloc(vkd, device, ssboAlloc);
4213
4214 // Run pipelines.
4215 const auto cmdPool = vk::makeCommandPool(vkd, device, queueIndex);
4216 const auto cmdBUfferPtr =
4217 vk::allocateCommandBuffer(vkd, device, cmdPool.get(), vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4218 const auto cmdBuffer = cmdBUfferPtr.get();
4219
4220 vk::beginCommandBuffer(vkd, cmdBuffer);
4221
4222 // Run the main test shader.
4223 const auto hostToComputeBarrier = vk::makeBufferMemoryBarrier(
4224 vk::VK_ACCESS_HOST_WRITE_BIT, vk::VK_ACCESS_SHADER_WRITE_BIT, ssbo.get(), 0ull, VK_WHOLE_SIZE);
4225 vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u,
4226 nullptr, 1u, &hostToComputeBarrier, 0u, nullptr);
4227
4228 testPipeline.bind(cmdBuffer);
4229 vkd.cmdBindDescriptorSets(cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, testPipeline.getPipelineLayout(), 0u, 1u,
4230 &descriptorSet.get(), 0u, nullptr);
4231 vkd.cmdDispatch(cmdBuffer, 1u, 1u, 1u);
4232
4233 const auto computeToHostBarrier = vk::makeBufferMemoryBarrier(
4234 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, ssbo.get(), 0ull, VK_WHOLE_SIZE);
4235 vkd.cmdPipelineBarrier(cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u,
4236 nullptr, 1u, &computeToHostBarrier, 0u, nullptr);
4237
4238 vk::endCommandBuffer(vkd, cmdBuffer);
4239 vk::submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4240
4241 // Verify buffer contents.
4242 vk::invalidateAlloc(vkd, device, ssboAlloc);
4243 std::unique_ptr<uint32_t[]> valuesArray(new uint32_t[invocations]);
4244 uint32_t *valuesPtr = valuesArray.get();
4245 deMemcpy(valuesPtr, ssboPtr, static_cast<size_t>(ssboSize));
4246
4247 std::string errorMsg;
4248 bool ok = true;
4249
4250 for (size_t i = 0; i < invocations; ++i)
4251 {
4252 if (valuesPtr[i] != 1u)
4253 {
4254 ok = false;
4255 errorMsg = "Found invalid value for invocation index " + de::toString(i) + ": expected 1u and found " +
4256 de::toString(valuesPtr[i]);
4257 break;
4258 }
4259 }
4260
4261 if (!ok)
4262 return tcu::TestStatus::fail(errorMsg);
4263 return tcu::TestStatus::pass("Pass");
4264 }
4265
4266 namespace EmptyShaderTest
4267 {
4268
checkSupport(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType)4269 void checkSupport(Context &context, vk::ComputePipelineConstructionType computePipelineConstructionType)
4270 {
4271 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
4272 computePipelineConstructionType);
4273 }
4274
createProgram(SourceCollections & dst,vk::ComputePipelineConstructionType)4275 void createProgram(SourceCollections &dst, vk::ComputePipelineConstructionType)
4276 {
4277 dst.glslSources.add("comp") << glu::ComputeSource("#version 310 es\n"
4278 "layout (local_size_x = 1) in;\n"
4279 "void main (void) {}\n");
4280 }
4281
createTest(Context & context,vk::ComputePipelineConstructionType computePipelineConstructionType)4282 tcu::TestStatus createTest(Context &context, vk::ComputePipelineConstructionType computePipelineConstructionType)
4283 {
4284 const DeviceInterface &vk = context.getDeviceInterface();
4285 const VkDevice device = context.getDevice();
4286 const VkQueue queue = context.getUniversalQueue();
4287 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4288
4289 ComputePipelineWrapper pipeline(vk, device, computePipelineConstructionType,
4290 context.getBinaryCollection().get("comp"));
4291 pipeline.buildPipeline();
4292
4293 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
4294 const Unique<VkCommandBuffer> cmdBuffer(
4295 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
4296
4297 // Start recording commands
4298
4299 beginCommandBuffer(vk, *cmdBuffer);
4300
4301 pipeline.bind(*cmdBuffer);
4302
4303 const tcu::IVec3 workGroups(1, 1, 1);
4304 vk.cmdDispatch(*cmdBuffer, workGroups.x(), workGroups.y(), workGroups.z());
4305
4306 endCommandBuffer(vk, *cmdBuffer);
4307
4308 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4309
4310 return tcu::TestStatus::pass("Compute succeeded");
4311 }
4312
4313 } // namespace EmptyShaderTest
4314
4315 namespace ComputeOnlyQueueTests
4316 {
4317
getComputeOnlyQueueFamily(Context & context)4318 tcu::Maybe<uint32_t> getComputeOnlyQueueFamily(Context &context)
4319 {
4320 bool foundQueue = false;
4321 uint32_t index = 0;
4322
4323 auto queueFamilies =
4324 getPhysicalDeviceQueueFamilyProperties(context.getInstanceInterface(), context.getPhysicalDevice());
4325
4326 for (const auto &queueFamily : queueFamilies)
4327 {
4328 if ((queueFamily.queueFlags & VK_QUEUE_COMPUTE_BIT) && !(queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT))
4329 {
4330 foundQueue = true;
4331 break;
4332 }
4333 else
4334 {
4335 index++;
4336 }
4337 }
4338 if (!foundQueue)
4339 {
4340 return tcu::Maybe<uint32_t>();
4341 }
4342 else
4343 {
4344 return index;
4345 }
4346 }
4347
4348 // Creates a device that has a queue for compute capabilities without graphics.
createComputeOnlyDevice(vk::VkInstance instance,const InstanceInterface & instanceDriver,const VkPhysicalDevice physicalDevice,Context & context,uint32_t & queueFamilyIndex)4349 Move<VkDevice> createComputeOnlyDevice(vk::VkInstance instance, const InstanceInterface &instanceDriver,
4350 const VkPhysicalDevice physicalDevice, Context &context,
4351 uint32_t &queueFamilyIndex)
4352 {
4353 const auto queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice);
4354
4355 // One queue family without a graphics bit should be found, since this is checked in checkSupport.
4356 queueFamilyIndex = getComputeOnlyQueueFamily(context).get();
4357
4358 const float queuePriority = 1.0f;
4359 const VkDeviceQueueCreateInfo deviceQueueCreateInfos = {
4360 VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
4361 nullptr, // const void* pNext;
4362 (VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags;
4363 queueFamilyIndex, // uint32_t queueFamilyIndex;
4364 1u, // uint32_t queueCount;
4365 &queuePriority, // const float* pQueuePriorities;
4366 };
4367
4368 void *pNext = nullptr;
4369 #ifdef CTS_USES_VULKANSC
4370 VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ?
4371 context.getResourceInterface()->getStatMax() :
4372 resetDeviceObjectReservationCreateInfo();
4373 pNext = &memReservationInfo;
4374
4375 VkPhysicalDeviceVulkanSC10Features sc10Features = createDefaultSC10Features();
4376 sc10Features.pNext = pNext;
4377 pNext = &sc10Features;
4378
4379 VkPipelineCacheCreateInfo pcCI;
4380 std::vector<VkPipelinePoolSize> poolSizes;
4381 if (context.getTestContext().getCommandLine().isSubProcess())
4382 {
4383 if (context.getResourceInterface()->getCacheDataSize() > 0)
4384 {
4385 pcCI = {
4386 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType;
4387 nullptr, // const void* pNext;
4388 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT |
4389 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags;
4390 context.getResourceInterface()->getCacheDataSize(), // uintptr_t initialDataSize;
4391 context.getResourceInterface()->getCacheData() // const void* pInitialData;
4392 };
4393 memReservationInfo.pipelineCacheCreateInfoCount = 1;
4394 memReservationInfo.pPipelineCacheCreateInfos = &pcCI;
4395 }
4396 poolSizes = context.getResourceInterface()->getPipelinePoolSizes();
4397 if (!poolSizes.empty())
4398 {
4399 memReservationInfo.pipelinePoolSizeCount = uint32_t(poolSizes.size());
4400 memReservationInfo.pPipelinePoolSizes = poolSizes.data();
4401 }
4402 }
4403 #endif // CTS_USES_VULKANSC
4404 const VkDeviceCreateInfo deviceCreateInfo = {
4405 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType;
4406 pNext, // const void* pNext;
4407 (VkDeviceCreateFlags)0u, // VkDeviceCreateFlags flags;
4408 1, // uint32_t queueCreateInfoCount;
4409 &deviceQueueCreateInfos, // const VkDeviceQueueCreateInfo* pQueueCreateInfos;
4410 0u, // uint32_t enabledLayerCount;
4411 nullptr, // const char* const* ppEnabledLayerNames;
4412 0, // uint32_t enabledExtensionCount;
4413 nullptr, // const char* const* ppEnabledExtensionNames;
4414 nullptr, // const VkPhysicalDeviceFeatures* pEnabledFeatures;
4415 };
4416
4417 return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(),
4418 context.getPlatformInterface(), instance, instanceDriver, physicalDevice,
4419 &deviceCreateInfo);
4420 }
4421
4422 class SecondaryCommandBufferComputeOnlyTest : public vkt::TestCase
4423 {
4424 public:
SecondaryCommandBufferComputeOnlyTest(tcu::TestContext & context,const std::string & name)4425 SecondaryCommandBufferComputeOnlyTest(tcu::TestContext &context, const std::string &name)
4426 : vkt::TestCase(context, name){};
4427
4428 void initPrograms(SourceCollections &programCollection) const override;
4429 TestInstance *createInstance(Context &context) const override;
4430 void checkSupport(Context &context) const override;
4431 };
4432
4433 class SecondaryCommandBufferComputeOnlyTestInstance : public vkt::TestInstance
4434 {
4435 public:
SecondaryCommandBufferComputeOnlyTestInstance(Context & context)4436 SecondaryCommandBufferComputeOnlyTestInstance(Context &context)
4437 : vkt::TestInstance(context)
4438 #ifdef CTS_USES_VULKANSC
4439 , m_customInstance(createCustomInstanceFromContext(context))
4440 #endif // CTS_USES_VULKANSC
4441 {};
4442 virtual tcu::TestStatus iterate(void);
4443
4444 protected:
4445 #ifdef CTS_USES_VULKANSC
4446 const CustomInstance m_customInstance;
4447 #endif // CTS_USES_VULKANSC
4448 };
4449
initPrograms(SourceCollections & collection) const4450 void SecondaryCommandBufferComputeOnlyTest::initPrograms(SourceCollections &collection) const
4451 {
4452 {
4453 std::ostringstream src;
4454 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
4455 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
4456 << "layout(set = 0, binding = 0, std430) buffer Out\n"
4457 << "{\n"
4458 << " uint data[];\n"
4459 << "};\n"
4460 << "void main (void)\n"
4461 << "{\n"
4462 << "data[0] = 1;"
4463 << "}\n";
4464 collection.glslSources.add("comp") << glu::ComputeSource(src.str());
4465 }
4466 }
4467
createInstance(Context & context) const4468 TestInstance *SecondaryCommandBufferComputeOnlyTest::createInstance(Context &context) const
4469 {
4470 return new SecondaryCommandBufferComputeOnlyTestInstance(context);
4471 }
4472
checkSupport(Context & context) const4473 void SecondaryCommandBufferComputeOnlyTest::checkSupport(Context &context) const
4474 {
4475 // Find at least one queue family that supports compute queue but does NOT support graphics queue.
4476 if (!getComputeOnlyQueueFamily(context))
4477 TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue.");
4478 }
4479
iterate()4480 tcu::TestStatus SecondaryCommandBufferComputeOnlyTestInstance::iterate()
4481 {
4482 VkDevice device;
4483 uint32_t queueFamilyIndex;
4484 #ifdef CTS_USES_VULKANSC
4485 const vk::InstanceInterface &vki = m_customInstance.getDriver();
4486 const VkPhysicalDevice physDevice =
4487 chooseDevice(vki, m_customInstance, m_context.getTestContext().getCommandLine());
4488 auto customDevice = createComputeOnlyDevice(m_customInstance, vki, physDevice, m_context, queueFamilyIndex);
4489 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> deviceDriver;
4490 #else
4491 const InstanceInterface &vki = m_context.getInstanceInterface();
4492 const VkPhysicalDevice physDevice = m_context.getPhysicalDevice();
4493 auto customDevice = createComputeOnlyDevice(m_context.getInstance(), vki, physDevice, m_context, queueFamilyIndex);
4494 de::MovePtr<DeviceDriver> deviceDriver;
4495 #endif // CTS_USES_VULKANSC
4496
4497 device = customDevice.get();
4498
4499 #ifndef CTS_USES_VULKANSC
4500 deviceDriver = de::MovePtr<DeviceDriver>(new DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(),
4501 device, m_context.getUsedApiVersion(),
4502 m_context.getTestContext().getCommandLine()));
4503 #else
4504 deviceDriver = de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter>(
4505 new DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, device,
4506 m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(),
4507 m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(),
4508 m_context.getUsedApiVersion()),
4509 DeinitDeviceDeleter(m_context.getResourceInterface().get(), device));
4510 #endif // CTS_USES_VULKANSC
4511
4512 const DeviceInterface &vkdi = *deviceDriver;
4513
4514 auto queue = getDeviceQueue(vkdi, device, queueFamilyIndex, 0u);
4515 auto allocator =
4516 de::MovePtr<Allocator>(new SimpleAllocator(vkdi, device, getPhysicalDeviceMemoryProperties(vki, physDevice)));
4517
4518 const auto bufferSize = static_cast<VkDeviceSize>(sizeof(uint32_t));
4519 BufferWithMemory buffer(vkdi, device, *allocator.get(),
4520 makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
4521 MemoryRequirement::HostVisible);
4522 auto &bufferAlloc = buffer.getAllocation();
4523 void *bufferData = bufferAlloc.getHostPtr();
4524 deMemset(bufferData, 0, sizeof(uint32_t));
4525 flushAlloc(vkdi, device, bufferAlloc);
4526
4527 DescriptorSetLayoutBuilder layoutBuilder;
4528 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
4529 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, device));
4530
4531 DescriptorPoolBuilder poolBuilder;
4532 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
4533 const auto descriptorPool = poolBuilder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1);
4534 const auto descriptorSetBuffer = makeDescriptorSet(vkdi, device, descriptorPool.get(), descriptorSetLayout.get());
4535
4536 // Update descriptor sets.
4537 DescriptorSetUpdateBuilder updater;
4538
4539 const auto bufferInfo = makeDescriptorBufferInfo(buffer.get(), 0ull, bufferSize);
4540 updater.writeSingle(descriptorSetBuffer.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
4541 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferInfo);
4542
4543 updater.update(vkdi, device);
4544
4545 auto shader = createShaderModule(vkdi, device, m_context.getBinaryCollection().get("comp"));
4546 // Create compute pipeline
4547 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vkdi, device, *descriptorSetLayout));
4548 const Unique<VkPipeline> computePipeline(makeComputePipeline(vkdi, device, *pipelineLayout, *shader));
4549
4550 // Create command buffer
4551 const Unique<VkCommandPool> cmdPool(makeCommandPool(vkdi, device, queueFamilyIndex));
4552 const Unique<VkCommandBuffer> cmdBuffer(
4553 allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
4554 const Unique<VkCommandBuffer> cmdBuffer2(
4555 allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_SECONDARY));
4556
4557 const VkCommandBufferInheritanceInfo bufferInheritanceInfo{
4558 VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, // VkStructureType sType;
4559 nullptr, // const void* pNext;
4560 VK_NULL_HANDLE, // VkRenderPass renderPass;
4561 0u, // uint32_t subpass;
4562 VK_NULL_HANDLE, // VkFramebuffer framebuffer;
4563 VK_FALSE, // VkBool32 occlusionQueryEnable;
4564 (VkQueryControlFlags)0u, // VkQueryControlFlags queryFlags;
4565 (VkQueryPipelineStatisticFlags)0u // VkQueryPipelineStatisticFlags pipelineStatistics;
4566 };
4567
4568 VkCommandBufferUsageFlags usageFlags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
4569 const VkCommandBufferBeginInfo commandBufBeginParams{
4570 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // VkStructureType sType;
4571 nullptr, // const void* pNext;
4572 usageFlags, // VkCommandBufferUsageFlags flags;
4573 &bufferInheritanceInfo};
4574
4575 beginCommandBuffer(vkdi, cmdBuffer.get());
4576 vkdi.beginCommandBuffer(cmdBuffer2.get(), &commandBufBeginParams);
4577 vkdi.cmdBindPipeline(cmdBuffer2.get(), VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.get());
4578 vkdi.cmdBindDescriptorSets(cmdBuffer2.get(), VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0u, 1,
4579 &descriptorSetBuffer.get(), 0u, nullptr);
4580 vkdi.cmdDispatch(cmdBuffer2.get(), 1, 1, 1);
4581 endCommandBuffer(vkdi, cmdBuffer2.get());
4582 vkdi.cmdExecuteCommands(cmdBuffer.get(), 1, &cmdBuffer2.get());
4583 const VkBufferMemoryBarrier renderBufferBarrier =
4584 makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, buffer.get(), 0ull, bufferSize);
4585 cmdPipelineBufferMemoryBarrier(vkdi, cmdBuffer.get(), VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
4586 VK_PIPELINE_STAGE_HOST_BIT, &renderBufferBarrier);
4587 endCommandBuffer(vkdi, cmdBuffer.get());
4588 submitCommandsAndWait(vkdi, device, queue, cmdBuffer.get());
4589
4590 invalidateAlloc(vkdi, device, bufferAlloc);
4591
4592 uint32_t result = 0;
4593 deMemcpy(&result, bufferData, sizeof(uint32_t));
4594 if (result != 1)
4595 {
4596 return tcu::TestStatus::pass("value of buffer unexpected");
4597 }
4598
4599 return tcu::TestStatus::pass("passed");
4600 }
4601
4602 }; // namespace ComputeOnlyQueueTests
4603
4604 } // namespace
4605
createBasicComputeShaderTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)4606 tcu::TestCaseGroup *createBasicComputeShaderTests(tcu::TestContext &testCtx,
4607 vk::ComputePipelineConstructionType computePipelineConstructionType)
4608 {
4609 // Basic compute tests
4610 de::MovePtr<tcu::TestCaseGroup> basicComputeTests(new tcu::TestCaseGroup(testCtx, "basic"));
4611
4612 // Shader that does nothing
4613 addFunctionCaseWithPrograms(basicComputeTests.get(), "empty_shader", EmptyShaderTest::checkSupport,
4614 EmptyShaderTest::createProgram, EmptyShaderTest::createTest,
4615 computePipelineConstructionType);
4616
4617 // Concurrent compute test
4618 basicComputeTests->addChild(new ConcurrentCompute(testCtx, "concurrent_compute", computePipelineConstructionType));
4619
4620 // Use an empty workgroup with size 0 on the X axis
4621 basicComputeTests->addChild(
4622 new EmptyWorkGroupCase(testCtx, "empty_workgroup_x", tcu::UVec3(0u, 2u, 3u), computePipelineConstructionType));
4623 // Use an empty workgroup with size 0 on the Y axis
4624 basicComputeTests->addChild(
4625 new EmptyWorkGroupCase(testCtx, "empty_workgroup_y", tcu::UVec3(2u, 0u, 3u), computePipelineConstructionType));
4626 // Use an empty workgroup with size 0 on the Z axis
4627 basicComputeTests->addChild(
4628 new EmptyWorkGroupCase(testCtx, "empty_workgroup_z", tcu::UVec3(2u, 3u, 0u), computePipelineConstructionType));
4629 // Use an empty workgroup with size 0 on the X, Y and Z axes
4630 basicComputeTests->addChild(new EmptyWorkGroupCase(testCtx, "empty_workgroup_all", tcu::UVec3(0u, 0u, 0u),
4631 computePipelineConstructionType));
4632
4633 // Use the maximum work group size on the X axis
4634 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_x",
4635 MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::X},
4636 computePipelineConstructionType));
4637 // Use the maximum work group size on the Y axis
4638 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_y",
4639 MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::Y},
4640 computePipelineConstructionType));
4641 // Use the maximum work group size on the Z axis
4642 basicComputeTests->addChild(new MaxWorkGroupSizeTest(testCtx, "max_local_size_z",
4643 MaxWorkGroupSizeTest::Params{MaxWorkGroupSizeTest::Axis::Z},
4644 computePipelineConstructionType));
4645
4646 // Concurrent compute test
4647 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(
4648 testCtx, "ubo_to_ssbo_single_invocation", 256, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
4649 computePipelineConstructionType));
4650 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_single_group", 1024,
4651 tcu::IVec3(2, 1, 4), tcu::IVec3(1, 1, 1),
4652 computePipelineConstructionType));
4653 basicComputeTests->addChild(BufferToBufferInvertTest::UBOToSSBOInvertCase(
4654 testCtx, "ubo_to_ssbo_multiple_invocations", 1024, tcu::IVec3(1, 1, 1), tcu::IVec3(2, 4, 1),
4655 computePipelineConstructionType));
4656 basicComputeTests->addChild(
4657 BufferToBufferInvertTest::UBOToSSBOInvertCase(testCtx, "ubo_to_ssbo_multiple_groups", 1024, tcu::IVec3(1, 4, 2),
4658 tcu::IVec3(2, 2, 4), computePipelineConstructionType));
4659
4660 // Concurrent compute test
4661 basicComputeTests->addChild(
4662 BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_single_invocation", 256, tcu::IVec3(1, 1, 1),
4663 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
4664 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(
4665 testCtx, "copy_ssbo_multiple_invocations", 1024, tcu::IVec3(1, 1, 1), tcu::IVec3(2, 4, 1),
4666 computePipelineConstructionType));
4667 basicComputeTests->addChild(BufferToBufferInvertTest::CopyInvertSSBOCase(testCtx, "copy_ssbo_multiple_groups", 1024,
4668 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
4669 computePipelineConstructionType));
4670
4671 // Read and write same SSBO
4672 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_single_invocation", 256, true,
4673 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
4674 computePipelineConstructionType));
4675 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_rw_multiple_groups", 1024, true,
4676 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
4677 computePipelineConstructionType));
4678 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_single_invocation", 256, false,
4679 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
4680 computePipelineConstructionType));
4681 basicComputeTests->addChild(new InvertSSBOInPlaceTest(testCtx, "ssbo_unsized_arr_multiple_groups", 1024, false,
4682 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
4683 computePipelineConstructionType));
4684
4685 // Write to multiple SSBOs
4686 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_single_invocation", 256, true,
4687 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
4688 computePipelineConstructionType));
4689 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_arr_multiple_groups", 1024, true,
4690 tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
4691 computePipelineConstructionType));
4692 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_single_invocation",
4693 256, false, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
4694 computePipelineConstructionType));
4695 basicComputeTests->addChild(new WriteToMultipleSSBOTest(testCtx, "write_multiple_unsized_arr_multiple_groups", 1024,
4696 false, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4),
4697 computePipelineConstructionType));
4698
4699 // SSBO local barrier usage
4700 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_invocation",
4701 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
4702 computePipelineConstructionType));
4703 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_single_group",
4704 tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1),
4705 computePipelineConstructionType));
4706 basicComputeTests->addChild(new SSBOLocalBarrierTest(testCtx, "ssbo_local_barrier_multiple_groups",
4707 tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3),
4708 computePipelineConstructionType));
4709
4710 // SSBO memory barrier usage
4711 basicComputeTests->addChild(
4712 new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_single", tcu::IVec3(1, 1, 1), computePipelineConstructionType));
4713 basicComputeTests->addChild(new SSBOBarrierTest(testCtx, "ssbo_cmd_barrier_multiple", tcu::IVec3(11, 5, 7),
4714 computePipelineConstructionType));
4715
4716 // Basic shared variable usage
4717 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_invocation", tcu::IVec3(1, 1, 1),
4718 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
4719 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_single_group", tcu::IVec3(3, 2, 5),
4720 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
4721 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_invocations", tcu::IVec3(1, 1, 1),
4722 tcu::IVec3(2, 5, 4), computePipelineConstructionType));
4723 basicComputeTests->addChild(new SharedVarTest(testCtx, "shared_var_multiple_groups", tcu::IVec3(3, 4, 1),
4724 tcu::IVec3(2, 7, 3), computePipelineConstructionType));
4725
4726 // Atomic operation with shared var
4727 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_invocation",
4728 tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1),
4729 computePipelineConstructionType));
4730 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_single_group", tcu::IVec3(3, 2, 5),
4731 tcu::IVec3(1, 1, 1), computePipelineConstructionType));
4732 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_invocations",
4733 tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4),
4734 computePipelineConstructionType));
4735 basicComputeTests->addChild(new SharedVarAtomicOpTest(testCtx, "shared_atomic_op_multiple_groups",
4736 tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3),
4737 computePipelineConstructionType));
4738
4739 // Image to SSBO copy
4740 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_small", tcu::IVec2(1, 1),
4741 tcu::IVec2(64, 64), computePipelineConstructionType));
4742 basicComputeTests->addChild(new CopyImageToSSBOTest(testCtx, "copy_image_to_ssbo_large", tcu::IVec2(2, 4),
4743 tcu::IVec2(512, 512), computePipelineConstructionType));
4744
4745 // SSBO to image copy
4746 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_small", tcu::IVec2(1, 1),
4747 tcu::IVec2(64, 64), computePipelineConstructionType));
4748 basicComputeTests->addChild(new CopySSBOToImageTest(testCtx, "copy_ssbo_to_image_large", tcu::IVec2(2, 4),
4749 tcu::IVec2(512, 512), computePipelineConstructionType));
4750
4751 // Atomic operation with image
4752 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_1", 1, tcu::IVec2(64, 64),
4753 computePipelineConstructionType));
4754 basicComputeTests->addChild(new ImageAtomicOpTest(testCtx, "image_atomic_op_local_size_8", 8, tcu::IVec2(64, 64),
4755 computePipelineConstructionType));
4756
4757 // Image barrier
4758 basicComputeTests->addChild(
4759 new ImageBarrierTest(testCtx, "image_barrier_single", tcu::IVec2(1, 1), computePipelineConstructionType));
4760 basicComputeTests->addChild(
4761 new ImageBarrierTest(testCtx, "image_barrier_multiple", tcu::IVec2(64, 64), computePipelineConstructionType));
4762
4763 // Test secondary command buffers in compute only queues
4764 basicComputeTests->addChild(
4765 new ComputeOnlyQueueTests::SecondaryCommandBufferComputeOnlyTest(testCtx, "secondary_compute_only_queue"));
4766
4767 #ifndef CTS_USES_VULKANSC
4768 if (!isComputePipelineConstructionTypeShaderObject(computePipelineConstructionType))
4769 {
4770 basicComputeTests->addChild(
4771 cts_amber::createAmberTestCase(testCtx, "write_ssbo_array", "", "compute", "write_ssbo_array.amber"));
4772 basicComputeTests->addChild(
4773 cts_amber::createAmberTestCase(testCtx, "branch_past_barrier", "", "compute", "branch_past_barrier.amber"));
4774 basicComputeTests->addChild(cts_amber::createAmberTestCase(
4775 testCtx, "webgl_spirv_loop",
4776 "Simple SPIR-V loop from a WebGL example that caused problems in some implementations", "compute",
4777 "webgl_spirv_loop.amber"));
4778 }
4779 #endif
4780
4781 return basicComputeTests.release();
4782 }
4783
createBasicDeviceGroupComputeShaderTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)4784 tcu::TestCaseGroup *createBasicDeviceGroupComputeShaderTests(
4785 tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
4786 {
4787 de::MovePtr<tcu::TestCaseGroup> deviceGroupComputeTests(new tcu::TestCaseGroup(testCtx, "device_group"));
4788
4789 deviceGroupComputeTests->addChild(new DispatchBaseTest(testCtx, "dispatch_base", 32768, tcu::IVec3(4, 2, 4),
4790 tcu::IVec3(16, 8, 8), tcu::IVec3(4, 8, 8),
4791 computePipelineConstructionType, false));
4792 #ifndef CTS_USES_VULKANSC
4793 deviceGroupComputeTests->addChild(new DispatchBaseTest(testCtx, "dispatch_base_maintenance5", 32768,
4794 tcu::IVec3(4, 2, 4), tcu::IVec3(16, 8, 8),
4795 tcu::IVec3(4, 8, 8), computePipelineConstructionType, true));
4796 #endif
4797 deviceGroupComputeTests->addChild(new DeviceIndexTest(testCtx, "device_index", 96, tcu::IVec3(3, 2, 1),
4798 tcu::IVec3(2, 4, 1), computePipelineConstructionType));
4799
4800 return deviceGroupComputeTests.release();
4801 }
4802 } // namespace compute
4803 } // namespace vkt
4804