1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 Google LLC.
6  *
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests that compute shaders have a subgroup size that is uniform in
23  * command scope.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "deUniquePtr.hpp"
27 
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include <sstream>
41 
42 using namespace vk;
43 
44 namespace vkt
45 {
46 namespace subgroups
47 {
48 namespace
49 {
50 
51 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
52 {
53 public:
54     MultipleDispatchesUniformSubgroupSizeInstance(Context &context);
55     tcu::TestStatus iterate(void);
56 };
57 
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)58 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance(Context &context)
59     : TestInstance(context)
60 {
61 }
62 
iterate(void)63 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate(void)
64 {
65     const DeviceInterface &vk       = m_context.getDeviceInterface();
66     const VkDevice device           = m_context.getDevice();
67     Allocator &allocator            = m_context.getDefaultAllocator();
68     const VkQueue queue             = m_context.getUniversalQueue();
69     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
70 
71     const Move<VkCommandPool> cmdPool =
72         createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
73     const Move<VkCommandBuffer> cmdBuffer =
74         allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
75 
76     Move<VkShaderModule> computeShader =
77         createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u);
78 
79     // The maximum number of invocations in a workgroup.
80     const uint32_t maxLocalSize = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
81 #ifndef CTS_USES_VULKANSC
82     const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlProperties().minSubgroupSize;
83 #else
84     const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlPropertiesEXT().minSubgroupSize;
85 #endif // CTS_USES_VULKANSC
86 
87     // Create a storage buffer to hold the sizes of subgroups.
88     const VkDeviceSize bufferSize = (maxLocalSize / minSubgroupSize + 1u) * sizeof(uint32_t);
89 
90     const VkBufferCreateInfo resultBufferCreateInfo =
91         makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
92     BufferWithMemory resultBuffer(vk, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible);
93     auto &resultBufferAlloc = resultBuffer.getAllocation();
94 
95     // Build descriptors for the storage buffer
96     const Unique<VkDescriptorPool> descriptorPool(
97         DescriptorPoolBuilder()
98             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
99             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
100     const auto descriptorSetLayout1(
101         DescriptorSetLayoutBuilder()
102             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
103             .build(vk, device));
104     const VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(*resultBuffer, 0u, bufferSize);
105 
106     const VkDescriptorSetAllocateInfo allocInfo = {
107         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
108         DE_NULL,                                        // pNext
109         *descriptorPool,                                // descriptorPool
110         1u,                                             // descriptorSetCount
111         &(*descriptorSetLayout1)                        // pSetLayouts
112     };
113 
114     Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vk, device, &allocInfo);
115     DescriptorSetUpdateBuilder builder;
116 
117     builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
118                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo);
119     builder.update(vk, device);
120 
121     // Compute pipeline
122     const Move<VkPipelineLayout> computePipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout1);
123 
124     for (uint32_t localSize = 1u; localSize <= maxLocalSize; localSize *= 2u)
125     {
126         // On each iteration, change the number of invocations which might affect
127         // the subgroup size.
128         const VkSpecializationMapEntry entries = {
129             0u,               // uint32_t constantID;
130             0u,               // uint32_t offset;
131             sizeof(localSize) // size_t size;
132         };
133 
134         const VkSpecializationInfo specInfo = {
135             1,                 // mapEntryCount
136             &entries,          // pMapEntries
137             sizeof(localSize), // dataSize
138             &localSize         // pData
139         };
140 
141         const VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {
142             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,                 // sType
143             DE_NULL,                                                             // pNext
144             VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags
145             VK_SHADER_STAGE_COMPUTE_BIT,                                         // stage
146             *computeShader,                                                      // module
147             "main",                                                              // pName
148             &specInfo,                                                           // pSpecializationInfo
149         };
150 
151         const VkComputePipelineCreateInfo pipelineCreateInfo = {
152             VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
153             DE_NULL,                                        // pNext
154             0u,                                             // flags
155             shaderStageCreateInfo,                          // stage
156             *computePipelineLayout,                         // layout
157             (VkPipeline)0,                                  // basePipelineHandle
158             0u,                                             // basePipelineIndex
159         };
160 
161         Move<VkPipeline> computePipeline = createComputePipeline(vk, device, (VkPipelineCache)0u, &pipelineCreateInfo);
162 
163         beginCommandBuffer(vk, *cmdBuffer);
164 
165         // Clears the values in the buffer.
166         vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
167 
168         const auto fillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
169                                                          *resultBuffer, 0ull, bufferSize);
170         cmdPipelineBufferMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
171                                        VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, &fillBarrier);
172 
173         // Runs pipeline.
174         vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u,
175                                  &descriptorSet.get(), 0u, nullptr);
176         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
177         vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
178 
179         const auto computeToHostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
180         cmdPipelineMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
181                                  &computeToHostBarrier);
182 
183         endCommandBuffer(vk, *cmdBuffer);
184         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
185 
186         invalidateAlloc(vk, device, resultBufferAlloc);
187 
188         // Validate results: all non-zero subgroup sizes must be the same.
189         const uint32_t *res     = static_cast<const uint32_t *>(resultBufferAlloc.getHostPtr());
190         const uint32_t maxIters = static_cast<uint32_t>(bufferSize / sizeof(uint32_t));
191         uint32_t size           = 0u;
192         uint32_t subgroupCount  = 0u;
193         auto &log               = m_context.getTestContext().getLog();
194 
195         for (uint32_t sizeIdx = 0u; sizeIdx < maxIters; ++sizeIdx)
196         {
197             if (res[sizeIdx] != 0u)
198             {
199                 if (size == 0u)
200                 {
201                     size = res[sizeIdx];
202                 }
203                 else if (res[sizeIdx] != size)
204                 {
205                     std::ostringstream msg;
206                     msg << "Subgroup size not uniform in command scope: " << res[sizeIdx] << " != " << size
207                         << " at position " << sizeIdx;
208                     TCU_FAIL(msg.str());
209                 }
210                 ++subgroupCount;
211             }
212         }
213 
214         // Subgroup size is guaranteed to be at least 1.
215         if (size == 0u)
216             TCU_FAIL("Subgroup size must be at least 1");
217 
218         // The number of reported sizes must match.
219         const auto expectedSubgroupCount = (localSize / size + ((localSize % size != 0u) ? 1u : 0u));
220         if (subgroupCount != expectedSubgroupCount)
221         {
222             std::ostringstream msg;
223             msg << "Local size " << localSize << " with subgroup size " << size << " resulted in subgroup count "
224                 << subgroupCount << " != " << expectedSubgroupCount;
225             TCU_FAIL(msg.str());
226         }
227 
228         {
229             std::ostringstream msg;
230             msg << "Subgroup size " << size << " with local size " << localSize;
231             log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
232         }
233     }
234 
235     return tcu::TestStatus::pass("Pass");
236 }
237 
238 class MultipleDispatchesUniformSubgroupSize : public TestCase
239 {
240 public:
241     MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx, const std::string &name);
242 
243     void initPrograms(SourceCollections &programCollection) const;
244     TestInstance *createInstance(Context &context) const;
245     virtual void checkSupport(Context &context) const;
246 };
247 
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name)248 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx,
249                                                                              const std::string &name)
250     : TestCase(testCtx, name)
251 {
252 }
253 
checkSupport(Context & context) const254 void MultipleDispatchesUniformSubgroupSize::checkSupport(Context &context) const
255 {
256 #ifndef CTS_USES_VULKANSC
257     const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
258         context.getSubgroupSizeControlFeatures();
259 #else
260     const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
261         context.getSubgroupSizeControlFeaturesEXT();
262 #endif // CTS_USES_VULKANSC
263 
264     if (subgroupSizeControlFeatures.subgroupSizeControl == false)
265         TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
266 }
267 
initPrograms(SourceCollections & programCollection) const268 void MultipleDispatchesUniformSubgroupSize::initPrograms(SourceCollections &programCollection) const
269 {
270     std::ostringstream computeSrc;
271     computeSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
272                << "#extension GL_KHR_shader_subgroup_basic : enable\n"
273                << "#extension GL_KHR_shader_subgroup_vote : enable\n"
274                << "#extension GL_KHR_shader_subgroup_ballot : enable\n"
275                << "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
276 
277                << "layout(local_size_x_id = 0) in;\n"
278 
279                << "void main()\n"
280                << "{\n"
281                << "    if (subgroupElect())\n"
282                << "    {\n"
283                << "        sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
284                << "    }\n"
285                << "}\n";
286 
287     programCollection.glslSources.add("comp")
288         << glu::ComputeSource(computeSrc.str())
289         << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
290 }
291 
createInstance(Context & context) const292 TestInstance *MultipleDispatchesUniformSubgroupSize::createInstance(Context &context) const
293 {
294     return new MultipleDispatchesUniformSubgroupSizeInstance(context);
295 }
296 
297 } // namespace
298 
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)299 tcu::TestCaseGroup *createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext &testCtx)
300 {
301     // Multiple dispatches uniform subgroup size tests
302     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches"));
303 
304     testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size"));
305     return testGroup.release();
306 }
307 
308 } // namespace subgroups
309 } // namespace vkt
310