1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 Google LLC.
6 *
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests that compute shaders have a subgroup size that is uniform in
23 * command scope.
24 *//*--------------------------------------------------------------------*/
25
26 #include "deUniquePtr.hpp"
27
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include <sstream>
41
42 using namespace vk;
43
44 namespace vkt
45 {
46 namespace subgroups
47 {
48 namespace
49 {
50
51 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
52 {
53 public:
54 MultipleDispatchesUniformSubgroupSizeInstance(Context &context);
55 tcu::TestStatus iterate(void);
56 };
57
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)58 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance(Context &context)
59 : TestInstance(context)
60 {
61 }
62
iterate(void)63 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate(void)
64 {
65 const DeviceInterface &vk = m_context.getDeviceInterface();
66 const VkDevice device = m_context.getDevice();
67 Allocator &allocator = m_context.getDefaultAllocator();
68 const VkQueue queue = m_context.getUniversalQueue();
69 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
70
71 const Move<VkCommandPool> cmdPool =
72 createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
73 const Move<VkCommandBuffer> cmdBuffer =
74 allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
75
76 Move<VkShaderModule> computeShader =
77 createShaderModule(vk, device, m_context.getBinaryCollection().get("comp"), 0u);
78
79 // The maximum number of invocations in a workgroup.
80 const uint32_t maxLocalSize = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
81 #ifndef CTS_USES_VULKANSC
82 const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlProperties().minSubgroupSize;
83 #else
84 const uint32_t minSubgroupSize = m_context.getSubgroupSizeControlPropertiesEXT().minSubgroupSize;
85 #endif // CTS_USES_VULKANSC
86
87 // Create a storage buffer to hold the sizes of subgroups.
88 const VkDeviceSize bufferSize = (maxLocalSize / minSubgroupSize + 1u) * sizeof(uint32_t);
89
90 const VkBufferCreateInfo resultBufferCreateInfo =
91 makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
92 BufferWithMemory resultBuffer(vk, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible);
93 auto &resultBufferAlloc = resultBuffer.getAllocation();
94
95 // Build descriptors for the storage buffer
96 const Unique<VkDescriptorPool> descriptorPool(
97 DescriptorPoolBuilder()
98 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
99 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
100 const auto descriptorSetLayout1(
101 DescriptorSetLayoutBuilder()
102 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
103 .build(vk, device));
104 const VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(*resultBuffer, 0u, bufferSize);
105
106 const VkDescriptorSetAllocateInfo allocInfo = {
107 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType
108 DE_NULL, // pNext
109 *descriptorPool, // descriptorPool
110 1u, // descriptorSetCount
111 &(*descriptorSetLayout1) // pSetLayouts
112 };
113
114 Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vk, device, &allocInfo);
115 DescriptorSetUpdateBuilder builder;
116
117 builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
118 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo);
119 builder.update(vk, device);
120
121 // Compute pipeline
122 const Move<VkPipelineLayout> computePipelineLayout = makePipelineLayout(vk, device, *descriptorSetLayout1);
123
124 for (uint32_t localSize = 1u; localSize <= maxLocalSize; localSize *= 2u)
125 {
126 // On each iteration, change the number of invocations which might affect
127 // the subgroup size.
128 const VkSpecializationMapEntry entries = {
129 0u, // uint32_t constantID;
130 0u, // uint32_t offset;
131 sizeof(localSize) // size_t size;
132 };
133
134 const VkSpecializationInfo specInfo = {
135 1, // mapEntryCount
136 &entries, // pMapEntries
137 sizeof(localSize), // dataSize
138 &localSize // pData
139 };
140
141 const VkPipelineShaderStageCreateInfo shaderStageCreateInfo = {
142 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
143 DE_NULL, // pNext
144 VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags
145 VK_SHADER_STAGE_COMPUTE_BIT, // stage
146 *computeShader, // module
147 "main", // pName
148 &specInfo, // pSpecializationInfo
149 };
150
151 const VkComputePipelineCreateInfo pipelineCreateInfo = {
152 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
153 DE_NULL, // pNext
154 0u, // flags
155 shaderStageCreateInfo, // stage
156 *computePipelineLayout, // layout
157 (VkPipeline)0, // basePipelineHandle
158 0u, // basePipelineIndex
159 };
160
161 Move<VkPipeline> computePipeline = createComputePipeline(vk, device, (VkPipelineCache)0u, &pipelineCreateInfo);
162
163 beginCommandBuffer(vk, *cmdBuffer);
164
165 // Clears the values in the buffer.
166 vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
167
168 const auto fillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
169 *resultBuffer, 0ull, bufferSize);
170 cmdPipelineBufferMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
171 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, &fillBarrier);
172
173 // Runs pipeline.
174 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u,
175 &descriptorSet.get(), 0u, nullptr);
176 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
177 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
178
179 const auto computeToHostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
180 cmdPipelineMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
181 &computeToHostBarrier);
182
183 endCommandBuffer(vk, *cmdBuffer);
184 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
185
186 invalidateAlloc(vk, device, resultBufferAlloc);
187
188 // Validate results: all non-zero subgroup sizes must be the same.
189 const uint32_t *res = static_cast<const uint32_t *>(resultBufferAlloc.getHostPtr());
190 const uint32_t maxIters = static_cast<uint32_t>(bufferSize / sizeof(uint32_t));
191 uint32_t size = 0u;
192 uint32_t subgroupCount = 0u;
193 auto &log = m_context.getTestContext().getLog();
194
195 for (uint32_t sizeIdx = 0u; sizeIdx < maxIters; ++sizeIdx)
196 {
197 if (res[sizeIdx] != 0u)
198 {
199 if (size == 0u)
200 {
201 size = res[sizeIdx];
202 }
203 else if (res[sizeIdx] != size)
204 {
205 std::ostringstream msg;
206 msg << "Subgroup size not uniform in command scope: " << res[sizeIdx] << " != " << size
207 << " at position " << sizeIdx;
208 TCU_FAIL(msg.str());
209 }
210 ++subgroupCount;
211 }
212 }
213
214 // Subgroup size is guaranteed to be at least 1.
215 if (size == 0u)
216 TCU_FAIL("Subgroup size must be at least 1");
217
218 // The number of reported sizes must match.
219 const auto expectedSubgroupCount = (localSize / size + ((localSize % size != 0u) ? 1u : 0u));
220 if (subgroupCount != expectedSubgroupCount)
221 {
222 std::ostringstream msg;
223 msg << "Local size " << localSize << " with subgroup size " << size << " resulted in subgroup count "
224 << subgroupCount << " != " << expectedSubgroupCount;
225 TCU_FAIL(msg.str());
226 }
227
228 {
229 std::ostringstream msg;
230 msg << "Subgroup size " << size << " with local size " << localSize;
231 log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
232 }
233 }
234
235 return tcu::TestStatus::pass("Pass");
236 }
237
238 class MultipleDispatchesUniformSubgroupSize : public TestCase
239 {
240 public:
241 MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx, const std::string &name);
242
243 void initPrograms(SourceCollections &programCollection) const;
244 TestInstance *createInstance(Context &context) const;
245 virtual void checkSupport(Context &context) const;
246 };
247
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name)248 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize(tcu::TestContext &testCtx,
249 const std::string &name)
250 : TestCase(testCtx, name)
251 {
252 }
253
checkSupport(Context & context) const254 void MultipleDispatchesUniformSubgroupSize::checkSupport(Context &context) const
255 {
256 #ifndef CTS_USES_VULKANSC
257 const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
258 context.getSubgroupSizeControlFeatures();
259 #else
260 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
261 context.getSubgroupSizeControlFeaturesEXT();
262 #endif // CTS_USES_VULKANSC
263
264 if (subgroupSizeControlFeatures.subgroupSizeControl == false)
265 TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
266 }
267
initPrograms(SourceCollections & programCollection) const268 void MultipleDispatchesUniformSubgroupSize::initPrograms(SourceCollections &programCollection) const
269 {
270 std::ostringstream computeSrc;
271 computeSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
272 << "#extension GL_KHR_shader_subgroup_basic : enable\n"
273 << "#extension GL_KHR_shader_subgroup_vote : enable\n"
274 << "#extension GL_KHR_shader_subgroup_ballot : enable\n"
275 << "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
276
277 << "layout(local_size_x_id = 0) in;\n"
278
279 << "void main()\n"
280 << "{\n"
281 << " if (subgroupElect())\n"
282 << " {\n"
283 << " sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
284 << " }\n"
285 << "}\n";
286
287 programCollection.glslSources.add("comp")
288 << glu::ComputeSource(computeSrc.str())
289 << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
290 }
291
createInstance(Context & context) const292 TestInstance *MultipleDispatchesUniformSubgroupSize::createInstance(Context &context) const
293 {
294 return new MultipleDispatchesUniformSubgroupSizeInstance(context);
295 }
296
297 } // namespace
298
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)299 tcu::TestCaseGroup *createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext &testCtx)
300 {
301 // Multiple dispatches uniform subgroup size tests
302 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches"));
303
304 testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size"));
305 return testGroup.release();
306 }
307
308 } // namespace subgroups
309 } // namespace vkt
310