1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 */ /*!
21 * \file
22 * \brief VK_EXT_subgroup_size_control Tests
23 */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsSizeControlTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "tcuTestLog.hpp"
29
30 #include <string>
31 #include <vector>
32 #include <algorithm>
33
34 using namespace tcu;
35 using namespace std;
36 using namespace vk;
37 using namespace vkt;
38
39 namespace
40 {
41
42 enum RequiredSubgroupSizeMode
43 {
44 REQUIRED_SUBGROUP_SIZE_NONE = 0,
45 REQUIRED_SUBGROUP_SIZE_MIN = 1,
46 REQUIRED_SUBGROUP_SIZE_MAX = 2,
47 };
48
49 struct CaseDefinition
50 {
51 uint32_t pipelineShaderStageCreateFlags;
52 VkShaderStageFlags shaderStage;
53 bool requiresBallot;
54 uint32_t requiredSubgroupSizeMode;
55 de::SharedPtr<bool> geometryPointSizeSupported;
56 SpirvVersion spirvVersion;
57
hasFullSubgroupsFlag__anon0760b57c0111::CaseDefinition58 bool hasFullSubgroupsFlag(void) const
59 {
60 return ((pipelineShaderStageCreateFlags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) !=
61 0u);
62 }
63
shaderUsesFullSubgroups__anon0760b57c0111::CaseDefinition64 bool shaderUsesFullSubgroups(void) const
65 {
66 return (hasFullSubgroupsFlag() || (spirvVersion >= SPIRV_VERSION_1_6));
67 }
68 };
69
70 struct TestParams
71 {
72 bool useSpirv16;
73 bool flagsEnabled;
74 string postfix;
75 };
76
77 struct internalDataStruct
78 {
79 const Context *context;
80 struct CaseDefinition caseDef;
81 const uint32_t requiredSubgroupSize;
82 const bool
83 isRequiredSubgroupSize; // Indicates if the test uses VkPipelineShaderStageRequiredSubgroupSizeCreateInfo.
84 };
85
makeDeBool(bool value)86 inline bool makeDeBool(bool value)
87 {
88 return (value ? true : false);
89 }
90
getLocalSizes(const uint32_t maxWorkGroupSize[3],uint32_t maxWorkGroupInvocations,uint32_t numWorkGroupInvocations)91 UVec3 getLocalSizes(const uint32_t maxWorkGroupSize[3], uint32_t maxWorkGroupInvocations,
92 uint32_t numWorkGroupInvocations)
93 {
94 DE_ASSERT(numWorkGroupInvocations <= maxWorkGroupInvocations);
95 DE_UNREF(maxWorkGroupInvocations); // For release builds.
96
97 const uint32_t localSizeX = de::gcd(numWorkGroupInvocations, maxWorkGroupSize[0]);
98 const uint32_t localSizeY = de::gcd(std::max(numWorkGroupInvocations / localSizeX, 1u), maxWorkGroupSize[1]);
99 const uint32_t localSizeZ = std::max(numWorkGroupInvocations / (localSizeX * localSizeY), 1u);
100
101 return UVec3(localSizeX, localSizeY, localSizeZ);
102 }
103
getRequiredSubgroupSizeFromMode(Context & context,const CaseDefinition & caseDef,const VkPhysicalDeviceSubgroupSizeControlProperties & subgroupSizeControlProperties)104 uint32_t getRequiredSubgroupSizeFromMode(
105 Context &context, const CaseDefinition &caseDef,
106 #ifndef CTS_USES_VULKANSC
107 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties)
108 #else
109 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties)
110 #endif // CTS_USES_VULKANSC
111 {
112 switch (caseDef.requiredSubgroupSizeMode)
113 {
114 case REQUIRED_SUBGROUP_SIZE_MAX:
115 return subgroupSizeControlProperties.maxSubgroupSize;
116 case REQUIRED_SUBGROUP_SIZE_MIN:
117 return subgroupSizeControlProperties.minSubgroupSize;
118 case REQUIRED_SUBGROUP_SIZE_NONE:
119 return subgroups::getSubgroupSize(context);
120 default:
121 TCU_THROW(NotSupportedError, "Unsupported Subgroup size");
122 }
123 }
124
checkVertexPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)125 static bool checkVertexPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width, uint32_t)
126 {
127 const struct internalDataStruct *checkInternalData =
128 reinterpret_cast<const struct internalDataStruct *>(internalData);
129 const Context *context = checkInternalData->context;
130 #ifndef CTS_USES_VULKANSC
131 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
132 context->getSubgroupSizeControlProperties();
133 #else
134 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
135 context->getSubgroupSizeControlPropertiesEXT();
136 #endif // CTS_USES_VULKANSC
137
138 TestLog &log = context->getTestContext().getLog();
139 const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
140
141 for (uint32_t i = 0; i < width; i++)
142 {
143 if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
144 data[i] < subgroupSizeControlProperties.minSubgroupSize)
145 {
146 log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") value is outside limits ("
147 << subgroupSizeControlProperties.minSubgroupSize << ", "
148 << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
149
150 return false;
151 }
152
153 if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
154 {
155 log << TestLog::Message << "gl_SubgroupSize (" << data[i]
156 << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize
157 << ")" << TestLog::EndMessage;
158
159 return false;
160 }
161 }
162
163 return true;
164 }
165
checkFragmentPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t height,uint32_t)166 static bool checkFragmentPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width,
167 uint32_t height, uint32_t)
168 {
169 const struct internalDataStruct *checkInternalData =
170 reinterpret_cast<const struct internalDataStruct *>(internalData);
171 const Context *context = checkInternalData->context;
172 #ifndef CTS_USES_VULKANSC
173 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
174 context->getSubgroupSizeControlProperties();
175 #else
176 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
177 context->getSubgroupSizeControlPropertiesEXT();
178 #endif // CTS_USES_VULKANSC
179 TestLog &log = context->getTestContext().getLog();
180 const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
181
182 for (uint32_t x = 0u; x < width; ++x)
183 {
184 for (uint32_t y = 0u; y < height; ++y)
185 {
186 const uint32_t ndx = (x * height + y);
187
188 if (data[ndx] > subgroupSizeControlProperties.maxSubgroupSize ||
189 data[ndx] < subgroupSizeControlProperties.minSubgroupSize)
190 {
191 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") value is outside limits ("
192 << subgroupSizeControlProperties.minSubgroupSize << ", "
193 << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
194
195 return false;
196 }
197
198 if (checkInternalData->isRequiredSubgroupSize && data[ndx] != checkInternalData->requiredSubgroupSize)
199 {
200 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx]
201 << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize
202 << ")" << TestLog::EndMessage;
203
204 return false;
205 }
206 }
207 }
208 return true;
209 }
210
checkCompute(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)211 static bool checkCompute(const void *internalData, vector<const void *> datas, const uint32_t numWorkgroups[3],
212 const uint32_t localSize[3], uint32_t)
213 {
214 const struct internalDataStruct *checkInternalData =
215 reinterpret_cast<const struct internalDataStruct *>(internalData);
216 const Context *context = checkInternalData->context;
217 #ifndef CTS_USES_VULKANSC
218 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
219 context->getSubgroupSizeControlProperties();
220 #else
221 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
222 context->getSubgroupSizeControlPropertiesEXT();
223 #endif // CTS_USES_VULKANSC
224 TestLog &log = context->getTestContext().getLog();
225 const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
226 const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
227 const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
228 const uint32_t width = globalSizeX * globalSizeY * globalSizeZ;
229 const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
230
231 for (uint32_t i = 0; i < width; i++)
232 {
233 if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
234 data[i] < subgroupSizeControlProperties.minSubgroupSize)
235 {
236 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
237 << "gl_SubgroupSize (" << data[i] << ") value is outside limits ("
238 << subgroupSizeControlProperties.minSubgroupSize << ", "
239 << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
240
241 return false;
242 }
243
244 if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
245 {
246 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
247 << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value ("
248 << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
249
250 return false;
251 }
252 }
253
254 return true;
255 }
256
checkComputeRequireFull(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)257 static bool checkComputeRequireFull(const void *internalData, vector<const void *> datas,
258 const uint32_t numWorkgroups[3], const uint32_t localSize[3], uint32_t)
259 {
260 const struct internalDataStruct *checkInternalData =
261 reinterpret_cast<const struct internalDataStruct *>(internalData);
262 const Context *context = checkInternalData->context;
263 #ifndef CTS_USES_VULKANSC
264 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
265 context->getSubgroupSizeControlProperties();
266 #else
267 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
268 context->getSubgroupSizeControlPropertiesEXT();
269 #endif // CTS_USES_VULKANSC
270 TestLog &log = context->getTestContext().getLog();
271 const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
272 const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
273 const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
274 const uint32_t width = globalSizeX * globalSizeY * globalSizeZ;
275 const UVec4 *data = reinterpret_cast<const UVec4 *>(datas[0]);
276 const uint32_t numSubgroups =
277 (localSize[0] * localSize[1] * localSize[2]) / checkInternalData->requiredSubgroupSize;
278 const bool exactSubgroupSize =
279 (checkInternalData->caseDef.shaderUsesFullSubgroups() && checkInternalData->isRequiredSubgroupSize);
280
281 for (uint32_t i = 0; i < width; i++)
282 {
283 if (data[i].x() > subgroupSizeControlProperties.maxSubgroupSize ||
284 data[i].x() < subgroupSizeControlProperties.minSubgroupSize)
285 {
286 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
287 << "gl_SubgroupSize value ( " << data[i].x() << ") is outside limits ["
288 << subgroupSizeControlProperties.minSubgroupSize << ", "
289 << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
290 return false;
291 }
292
293 if (data[i].x() != data[i].y())
294 {
295 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
296 << "gl_SubgroupSize ( " << data[i].x() << ") does not match the active number of subgroup invocations ("
297 << data[i].y() << ")" << TestLog::EndMessage;
298 return false;
299 }
300
301 if (exactSubgroupSize && data[i].x() != checkInternalData->requiredSubgroupSize)
302 {
303 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
304 << "expected subgroupSize (" << checkInternalData->requiredSubgroupSize
305 << ") doesn't match gl_SubgroupSize ( " << data[i].x() << ")" << TestLog::EndMessage;
306 return false;
307 }
308
309 if (exactSubgroupSize && data[i].z() != numSubgroups)
310 {
311 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
312 << "expected number of subgroups dispatched (" << numSubgroups << ") doesn't match gl_NumSubgroups ("
313 << data[i].z() << ")" << TestLog::EndMessage;
314 return false;
315 }
316 }
317
318 return true;
319 }
320
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)321 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
322 {
323 const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
324
325 if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
326 subgroups::setFragmentShaderFrameBuffer(programCollection);
327
328 if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage && VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
329 subgroups::setVertexShaderFrameBuffer(programCollection);
330
331 string bdyStr = "uint tempResult = gl_SubgroupSize;\n";
332
333 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
334 {
335 ostringstream vertex;
336
337 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
338 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
339 << "layout(location = 0) in highp vec4 in_position;\n"
340 << "layout(location = 0) out float out_color;\n"
341 << "\n"
342 << "void main (void)\n"
343 << "{\n"
344 << bdyStr << " out_color = float(tempResult);\n"
345 << " gl_Position = in_position;\n"
346 << " gl_PointSize = 1.0f;\n"
347 << "}\n";
348
349 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
350 }
351 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
352 {
353 ostringstream geometry;
354
355 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
356 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
357 << "layout(points) in;\n"
358 << "layout(points, max_vertices = 1) out;\n"
359 << "layout(location = 0) out float out_color;\n"
360 << "void main (void)\n"
361 << "{\n"
362 << bdyStr << " out_color = float(tempResult);\n"
363 << " gl_Position = gl_in[0].gl_Position;\n"
364 << " gl_PointSize = 1.0f;"
365 << " EmitVertex();\n"
366 << " EndPrimitive();\n"
367 << "}\n";
368
369 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
370 }
371 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
372 {
373 ostringstream controlSource;
374
375 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
376 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
377 << "layout(vertices = 2) out;\n"
378 << "layout(location = 0) out float out_color[];\n"
379 << "\n"
380 << "void main (void)\n"
381 << "{\n"
382 << " if (gl_InvocationID == 0)\n"
383 << " {\n"
384 << " gl_TessLevelOuter[0] = 1.0f;\n"
385 << " gl_TessLevelOuter[1] = 1.0f;\n"
386 << " }\n"
387 << bdyStr << " out_color[gl_InvocationID ] = float(tempResult);\n"
388 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
389 << "}\n";
390
391 programCollection.glslSources.add("tesc")
392 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
393 subgroups::setTesEvalShaderFrameBuffer(programCollection);
394 }
395 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
396 {
397 ostringstream evaluationSource;
398 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
399 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
400 << "layout(isolines, equal_spacing, ccw ) in;\n"
401 << "layout(location = 0) out float out_color;\n"
402 << "void main (void)\n"
403 << "{\n"
404 << bdyStr << " out_color = float(tempResult);\n"
405 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
406 << "}\n";
407
408 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
409 programCollection.glslSources.add("tese")
410 << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
411 }
412 else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
413 {
414 const string vertex = string(glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)) +
415 "\n"
416 "void main (void)\n"
417 "{\n"
418 " vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
419 " gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
420 " gl_PointSize = 1.0f;\n"
421 "}\n";
422 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
423
424 ostringstream fragmentSource;
425
426 fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
427 << "precision highp int;\n"
428 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
429 << "layout(location = 0) out uint out_color;\n"
430 << "void main()\n"
431 << "{\n"
432 << bdyStr << " out_color = tempResult;\n"
433 << "}\n";
434
435 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSource.str()) << buildOptions;
436 }
437 else
438 {
439 DE_FATAL("Unsupported shader stage");
440 }
441 }
442
getExtHeader(const CaseDefinition &)443 string getExtHeader(const CaseDefinition &)
444 {
445 return "#extension GL_KHR_shader_subgroup_basic: enable\n";
446 }
447
getPerStageHeadDeclarations(const CaseDefinition & caseDef)448 vector<string> getPerStageHeadDeclarations(const CaseDefinition &caseDef)
449 {
450 const uint32_t stageCount = subgroups::getStagesCount(caseDef.shaderStage);
451 const bool fragment = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
452 vector<string> result(stageCount, string());
453
454 if (fragment)
455 result.reserve(result.size() + 1);
456
457 for (size_t i = 0; i < result.size(); ++i)
458 {
459 result[i] = "layout(set = 0, binding = " + de::toString(i) +
460 ", std430) buffer Buffer1\n"
461 "{\n"
462 " uint result[];\n"
463 "};\n";
464 }
465
466 if (fragment)
467 {
468 const string fragPart = "layout(location = 0) out uint result;\n";
469
470 result.push_back(fragPart);
471 }
472
473 return result;
474 }
475
getTestSource(const CaseDefinition &)476 string getTestSource(const CaseDefinition &)
477 {
478 return " uint tempResult = gl_SubgroupSize;\n"
479 " tempRes = tempResult;\n";
480 }
481
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)482 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
483 {
484 ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u,
485 (caseDef.spirvVersion == vk::SPIRV_VERSION_1_4));
486 const string extHeader = getExtHeader(caseDef);
487 const string testSrc = getTestSource(caseDef);
488 const vector<string> headDeclarations = getPerStageHeadDeclarations(caseDef);
489
490 subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
491 *caseDef.geometryPointSizeSupported, extHeader, testSrc, "", headDeclarations);
492 }
493
initProgramsRequireFull(SourceCollections & programCollection,CaseDefinition caseDef)494 void initProgramsRequireFull(SourceCollections &programCollection, CaseDefinition caseDef)
495 {
496 if (VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
497 DE_FATAL("Unsupported shader stage");
498
499 ostringstream src;
500
501 src << "#version 450\n"
502 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
503 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
504 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
505 "local_size_z_id = 2) in;\n"
506 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
507 << "{\n"
508 << " uvec4 result[];\n"
509 << "};\n"
510 << "\n"
511 << "void main (void)\n"
512 << "{\n"
513 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
514 << " highp uint offset = globalSize.x * ((globalSize.y * "
515 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
516 "gl_GlobalInvocationID.x;\n"
517 << " result[offset].x = gl_SubgroupSize;\n" // save the subgroup size value
518 << " uint numActive = subgroupBallotBitCount(subgroupBallot(true));\n"
519 << " result[offset].y = numActive;\n" // save the number of active subgroup invocations
520 << " result[offset].z = gl_NumSubgroups;\n" // save the number of subgroups dispatched.
521 << "}\n";
522
523 programCollection.glslSources.add("comp")
524 << glu::ComputeSource(src.str())
525 << ShaderBuildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
526 }
527
supportedCheck(Context & context)528 void supportedCheck(Context &context)
529 {
530 if (!subgroups::isSubgroupSupported(context))
531 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
532
533 context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
534 }
535
supportedCheckFeatures(Context & context,CaseDefinition caseDef)536 void supportedCheckFeatures(Context &context, CaseDefinition caseDef)
537 {
538 supportedCheck(context);
539
540 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
541 {
542 TCU_THROW(NotSupportedError, "Shader stage is required to support subgroup operations!");
543 }
544
545 if (caseDef.shaderStage == VK_SHADER_STAGE_ALL_GRAPHICS)
546 {
547 const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
548
549 if (!features.tessellationShader || !features.geometryShader)
550 TCU_THROW(NotSupportedError, "Device does not support tessellation or geometry shaders");
551 }
552
553 if (caseDef.requiresBallot &&
554 !subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
555 {
556 TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
557 }
558
559 if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE ||
560 caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
561 {
562 #ifndef CTS_USES_VULKANSC
563 const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
564 context.getSubgroupSizeControlFeatures();
565 #else
566 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
567 context.getSubgroupSizeControlFeaturesEXT();
568 #endif // CTS_USES_VULKANSC
569
570 if (subgroupSizeControlFeatures.subgroupSizeControl == false)
571 TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
572
573 if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE)
574 {
575 #ifndef CTS_USES_VULKANSC
576 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
577 context.getSubgroupSizeControlProperties();
578 #else
579 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
580 context.getSubgroupSizeControlPropertiesEXT();
581 #endif // CTS_USES_VULKANSC
582
583 if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
584 TCU_THROW(NotSupportedError,
585 "Device does not support setting required subgroup size for the stages selected");
586 }
587 }
588
589 if (caseDef.hasFullSubgroupsFlag())
590 {
591 #ifndef CTS_USES_VULKANSC
592 const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
593 context.getSubgroupSizeControlFeatures();
594 #else
595 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
596 context.getSubgroupSizeControlFeaturesEXT();
597 #endif // CTS_USES_VULKANSC
598
599 if (subgroupSizeControlFeatures.computeFullSubgroups == false)
600 TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
601 }
602
603 *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
604
605 #ifndef CTS_USES_VULKANSC
606 if (isAllRayTracingStages(caseDef.shaderStage))
607 {
608 context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
609 }
610 else if (isAllMeshShadingStages(caseDef.shaderStage))
611 {
612 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
613 context.requireDeviceFunctionality("VK_EXT_mesh_shader");
614
615 if ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u)
616 {
617 const auto &features = context.getMeshShaderFeaturesEXT();
618 if (!features.taskShader)
619 TCU_THROW(NotSupportedError, "Task shaders not supported");
620 }
621 }
622 #endif // CTS_USES_VULKANSC
623
624 if (caseDef.spirvVersion > vk::getMaxSpirvVersionForVulkan(context.getUsedApiVersion()))
625 TCU_THROW(NotSupportedError, "Shader requires SPIR-V version higher than available");
626 }
627
supportedCheckFeaturesShader(Context & context,CaseDefinition caseDef)628 void supportedCheckFeaturesShader(Context &context, CaseDefinition caseDef)
629 {
630 supportedCheckFeatures(context, caseDef);
631
632 subgroups::supportedCheckShader(context, caseDef.shaderStage);
633 }
634
noSSBOtest(Context & context,const CaseDefinition caseDef)635 TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
636 {
637 const VkFormat format = VK_FORMAT_R32_UINT;
638 const uint32_t &flags = caseDef.pipelineShaderStageCreateFlags;
639 const struct internalDataStruct internalData = {
640 &context,
641 caseDef,
642 0u,
643 false,
644 };
645
646 switch (caseDef.shaderStage)
647 {
648 case VK_SHADER_STAGE_VERTEX_BIT:
649 return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
650 checkVertexPipelineStages, flags, 0u);
651 case VK_SHADER_STAGE_GEOMETRY_BIT:
652 return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
653 checkVertexPipelineStages, flags, 0u);
654 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
655 return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
656 context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
657 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
658 return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
659 context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
660 case VK_SHADER_STAGE_FRAGMENT_BIT:
661 return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
662 checkFragmentPipelineStages, flags, 0u);
663 default:
664 TCU_THROW(InternalError, "Unhandled shader stage");
665 }
666 }
667
test(Context & context,const CaseDefinition caseDef)668 TestStatus test(Context &context, const CaseDefinition caseDef)
669 {
670 if (isAllComputeStages(caseDef.shaderStage))
671 {
672 const uint32_t numWorkgroups[3] = {1, 1, 1};
673 const uint32_t subgroupSize = subgroups::getSubgroupSize(context);
674 const auto &physicalDeviceProperties = context.getDeviceProperties();
675 const auto &maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
676 const auto &maxInvocations = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
677 // Calculate the local workgroup sizes to exercise the maximum supported by the driver
678 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
679 const uint32_t localSizesToTestCount = 16;
680 const uint32_t localSizesToTest[localSizesToTestCount][3] = {
681 {1, 1, 1},
682 {32, 4, 1},
683 {32, 1, 4},
684 {1, 32, 4},
685 {1, 4, 32},
686 {4, 1, 32},
687 {4, 32, 1},
688 {subgroupSize, 1, 1},
689 {1, subgroupSize, 1},
690 {1, 1, subgroupSize},
691 {3, 5, 7},
692 {128, 1, 1},
693 {1, 128, 1},
694 {1, 1, 64},
695 {localSize.x(), localSize.y(), localSize.z()},
696 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
697 };
698 const struct internalDataStruct internalData = {
699 &context,
700 caseDef,
701 subgroupSize,
702 false,
703 };
704
705 return subgroups::makeComputeTestRequiredSubgroupSize(
706 context, VK_FORMAT_R32_UINT, DE_NULL, 0, &internalData, checkCompute,
707 caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
708 subgroupSize, localSizesToTest, localSizesToTestCount);
709 }
710 #ifndef CTS_USES_VULKANSC
711 else if (isAllMeshShadingStages(caseDef.shaderStage))
712 {
713 const bool isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
714 const bool isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
715
716 DE_ASSERT(isMesh != isTask);
717 DE_UNREF(isTask); // For release builds.
718
719 const uint32_t numWorkgroups[3] = {1, 1, 1};
720 const uint32_t subgroupSize = subgroups::getSubgroupSize(context);
721 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
722 const auto &maxWorkGroupSize =
723 (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
724 const auto &maxInvocations =
725 (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
726 // Calculate the local workgroup sizes to exercise the maximum supported by the driver
727 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
728 const uint32_t localSizesToTestCount = 16;
729 const uint32_t localSizesToTest[localSizesToTestCount][3] = {
730 {1, 1, 1},
731 {32, 4, 1},
732 {32, 1, 4},
733 {1, 32, 4},
734 {1, 4, 32},
735 {4, 1, 32},
736 {4, 32, 1},
737 {subgroupSize, 1, 1},
738 {1, subgroupSize, 1},
739 {1, 1, subgroupSize},
740 {3, 5, 7},
741 {128, 1, 1},
742 {1, 128, 1},
743 {1, 1, 64},
744 {localSize.x(), localSize.y(), localSize.z()},
745 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
746 };
747 const struct internalDataStruct internalData = {
748 &context,
749 caseDef,
750 subgroupSize,
751 false,
752 };
753
754 return subgroups::makeMeshTestRequiredSubgroupSize(
755 context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkCompute,
756 caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
757 subgroupSize, localSizesToTest, localSizesToTestCount);
758 }
759 #endif // CTS_USES_VULKANSC
760 else if (isAllGraphicsStages(caseDef.shaderStage))
761 {
762 const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
763 struct internalDataStruct internalData = {
764 &context,
765 caseDef,
766 0u,
767 false,
768 };
769
770 return subgroups::allStagesRequiredSubgroupSize(
771 context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkVertexPipelineStages, stages,
772 caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
773 caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
774 caseDef.pipelineShaderStageCreateFlags, nullptr);
775 }
776 #ifndef CTS_USES_VULKANSC
777 else if (isAllRayTracingStages(caseDef.shaderStage))
778 {
779 const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
780 const vector<uint32_t> flags(6, caseDef.pipelineShaderStageCreateFlags);
781 const struct internalDataStruct internalData = {
782 &context,
783 caseDef,
784 0u,
785 false,
786 };
787
788 return subgroups::allRayTracingStagesRequiredSubgroupSize(context, VK_FORMAT_R32_UINT, nullptr, 0,
789 &internalData, checkVertexPipelineStages, stages,
790 flags.data(), nullptr);
791 }
792 #endif // CTS_USES_VULKANSC
793 else
794 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
795 }
796
testRequireFullSubgroups(Context & context,const CaseDefinition caseDef)797 TestStatus testRequireFullSubgroups(Context &context, const CaseDefinition caseDef)
798 {
799 DE_ASSERT(VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage);
800 DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
801
802 const uint32_t numWorkgroups[3] = {1, 1, 1};
803 #ifndef CTS_USES_VULKANSC
804 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
805 context.getSubgroupSizeControlProperties();
806 #else
807 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
808 context.getSubgroupSizeControlPropertiesEXT();
809 #endif // CTS_USES_VULKANSC
810 const VkPhysicalDeviceProperties &physicalDeviceProperties = context.getDeviceProperties();
811 // Calculate the local workgroup sizes to exercise the maximum supported by the driver
812 const auto &maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
813 const auto &maxInvocations = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
814 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
815 const uint32_t subgroupSize = subgroups::getSubgroupSize(context);
816 // For full subgroups and allow varying subgroup size, localsize X must be a multiple of maxSubgroupSize.
817 // We set local size X for this test to the maximum, regardless if allow varying subgroup size is enabled or not.
818 const uint32_t localSizesToTestCount = 7;
819 const uint32_t localSizesToTest[localSizesToTestCount][3] = {
820 {subgroupSizeControlProperties.maxSubgroupSize, 1, 1},
821 {subgroupSizeControlProperties.maxSubgroupSize, 4, 1},
822 {subgroupSizeControlProperties.maxSubgroupSize, 1, 4},
823 {subgroupSizeControlProperties.maxSubgroupSize * 2, 1, 2},
824 {subgroupSizeControlProperties.maxSubgroupSize * 4, 1, 1},
825 {localSize.x(), localSize.y(), localSize.z()},
826 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
827 };
828 const struct internalDataStruct internalData = {
829 &context,
830 caseDef,
831 subgroupSize,
832 false,
833 };
834
835 DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
836
837 return subgroups::makeComputeTestRequiredSubgroupSize(
838 context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkComputeRequireFull,
839 caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
840 subgroupSize, localSizesToTest, localSizesToTestCount);
841 }
842
testRequireSubgroupSize(Context & context,const CaseDefinition caseDef)843 TestStatus testRequireSubgroupSize(Context &context, const CaseDefinition caseDef)
844 {
845 if (isAllComputeStages(caseDef.shaderStage))
846 {
847 const uint32_t numWorkgroups[3] = {1, 1, 1};
848 #ifndef CTS_USES_VULKANSC
849 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
850 context.getSubgroupSizeControlProperties();
851 #else
852 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
853 context.getSubgroupSizeControlPropertiesEXT();
854 #endif // CTS_USES_VULKANSC
855 const VkPhysicalDeviceProperties &physicalDeviceProperties = context.getDeviceProperties();
856 const uint32_t requiredSubgroupSize =
857 getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
858 const uint64_t maxSubgroupLimitSize =
859 (uint64_t)requiredSubgroupSize * subgroupSizeControlProperties.maxComputeWorkgroupSubgroups;
860 const uint32_t maxTotalLocalSize = (uint32_t)min<uint64_t>(
861 maxSubgroupLimitSize, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
862 const auto &maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
863 const auto &maxInvocations = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
864 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxTotalLocalSize);
865 const bool shaderUsesFullSubgroups = caseDef.shaderUsesFullSubgroups();
866 const uint32_t localSizesToTest[5][3] = {
867 {localSize.x(), localSize.y(), localSize.z()},
868 {requiredSubgroupSize, 1, 1},
869 {1, requiredSubgroupSize, 1},
870 {1, 1, requiredSubgroupSize},
871 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
872 };
873
874 // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
875 // subgroup size, as required by the spec.
876 uint32_t localSizesToTestCount = 5;
877 if (shaderUsesFullSubgroups)
878 localSizesToTestCount = 3;
879
880 const internalDataStruct internalData = {
881 &context, // const Context* context;
882 caseDef, // struct CaseDefinition caseDef;
883 requiredSubgroupSize, // uint32_t requiredSubgroupSize;
884 true, // bool isRequiredSubgroupSize;
885 };
886
887 // Depending on the flag and SPIR-V version we need to run one verification function or another.
888 const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
889
890 return subgroups::makeComputeTestRequiredSubgroupSize(
891 context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkFunction,
892 caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
893 requiredSubgroupSize, localSizesToTest, localSizesToTestCount);
894 }
895 #ifndef CTS_USES_VULKANSC
896 else if (isAllMeshShadingStages(caseDef.shaderStage))
897 {
898 const auto isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
899 const auto isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
900
901 DE_ASSERT(isMesh != isTask);
902 DE_UNREF(isTask); // For release builds.
903
904 const uint32_t numWorkgroups[3] = {1, 1, 1};
905 const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
906 const auto &meshProperties = context.getMeshShaderPropertiesEXT();
907 const uint32_t requiredSubgroupSize =
908 getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
909 const auto &maxWorkGroupSize =
910 (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
911 const auto &maxInvocations =
912 (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
913 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
914 const bool shaderUsesFullSubgroups = caseDef.shaderUsesFullSubgroups();
915 const uint32_t localSizesToTest[5][3] = {
916 {requiredSubgroupSize, 1, 1},
917 {1, requiredSubgroupSize, 1},
918 {1, 1, requiredSubgroupSize},
919 {localSize.x(), localSize.y(), localSize.z()},
920 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
921 };
922
923 // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
924 // subgroup size, as required by the spec.
925 uint32_t localSizesToTestCount = 5;
926 if (shaderUsesFullSubgroups)
927 localSizesToTestCount = 3;
928
929 const internalDataStruct internalData = {
930 &context, // const Context* context;
931 caseDef, // struct CaseDefinition caseDef;
932 requiredSubgroupSize, // uint32_t requiredSubgroupSize;
933 true, // bool isRequiredSubgroupSize;
934 };
935
936 // Depending on the flag and SPIR-V version we need to run one verification function or another.
937 const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
938
939 return subgroups::makeMeshTestRequiredSubgroupSize(
940 context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkFunction,
941 caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
942 requiredSubgroupSize, localSizesToTest, localSizesToTestCount);
943 }
944 #endif // CTS_USES_VULKANSC
945 else if (isAllGraphicsStages(caseDef.shaderStage))
946 {
947 const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
948 #ifndef CTS_USES_VULKANSC
949 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
950 context.getSubgroupSizeControlProperties();
951 #else
952 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
953 context.getSubgroupSizeControlPropertiesEXT();
954 #endif // CTS_USES_VULKANSC
955 const uint32_t requiredSubgroupSize =
956 getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
957 const uint32_t requiredSubgroupSizes[5] = {requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize,
958 requiredSubgroupSize, requiredSubgroupSize};
959 const internalDataStruct internalData = {
960 &context, // const Context* context;
961 caseDef, // struct CaseDefinition caseDef;
962 requiredSubgroupSize, // uint32_t requiredSubgroupSize;
963 true, // bool isRequiredSubgroupSize;
964 };
965
966 return subgroups::allStagesRequiredSubgroupSize(
967 context, VK_FORMAT_R32_UINT, DE_NULL, 0, &internalData, checkVertexPipelineStages, stages,
968 caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
969 caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
970 caseDef.pipelineShaderStageCreateFlags, requiredSubgroupSizes);
971 }
972 #ifndef CTS_USES_VULKANSC
973 else if (isAllRayTracingStages(caseDef.shaderStage))
974 {
975 const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
976 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
977 context.getSubgroupSizeControlProperties();
978 const uint32_t requiredSubgroupSize =
979 getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
980 const vector<uint32_t> flags(6, caseDef.pipelineShaderStageCreateFlags);
981 const vector<uint32_t> requiredSubgroupSizes(6, requiredSubgroupSize);
982 const struct internalDataStruct internalData = {
983 &context, // const Context* context;
984 caseDef, // struct CaseDefinition caseDef;
985 requiredSubgroupSize, // uint32_t requiredSubgroupSize;
986 true, // bool isRequiredSubgroupSize;
987 };
988
989 return subgroups::allRayTracingStagesRequiredSubgroupSize(context, VK_FORMAT_R32_UINT, DE_NULL, 0,
990 &internalData, checkVertexPipelineStages, stages,
991 flags.data(), requiredSubgroupSizes.data());
992 }
993 #endif // CTS_USES_VULKANSC
994 else
995 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
996 }
997
noSSBOtestRequireSubgroupSize(Context & context,const CaseDefinition caseDef)998 TestStatus noSSBOtestRequireSubgroupSize(Context &context, const CaseDefinition caseDef)
999 {
1000 #ifndef CTS_USES_VULKANSC
1001 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1002 context.getSubgroupSizeControlProperties();
1003 #else
1004 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1005 context.getSubgroupSizeControlPropertiesEXT();
1006 #endif // CTS_USES_VULKANSC
1007 const uint32_t requiredSubgroupSize =
1008 getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
1009 const VkFormat format = VK_FORMAT_R32_UINT;
1010 const uint32_t &flags = caseDef.pipelineShaderStageCreateFlags;
1011 const uint32_t &size = requiredSubgroupSize;
1012 struct internalDataStruct internalData = {
1013 &context,
1014 caseDef,
1015 requiredSubgroupSize,
1016 true,
1017 };
1018
1019 switch (caseDef.shaderStage)
1020 {
1021 case VK_SHADER_STAGE_VERTEX_BIT:
1022 return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
1023 checkVertexPipelineStages, flags, size);
1024 case VK_SHADER_STAGE_GEOMETRY_BIT:
1025 return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
1026 checkVertexPipelineStages, flags, size);
1027 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1028 return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
1029 context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
1030 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1031 return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
1032 context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
1033 case VK_SHADER_STAGE_FRAGMENT_BIT:
1034 return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
1035 checkFragmentPipelineStages, flags, size);
1036 default:
1037 TCU_THROW(InternalError, "Unhandled shader stage");
1038 }
1039 }
1040
testSanitySubgroupSizeProperties(Context & context)1041 TestStatus testSanitySubgroupSizeProperties(Context &context)
1042 {
1043 #ifndef CTS_USES_VULKANSC
1044 VkPhysicalDeviceSubgroupSizeControlProperties subgroupSizeControlProperties;
1045 subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1046 #else
1047 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
1048 subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
1049 #endif // CTS_USES_VULKANSC
1050
1051 subgroupSizeControlProperties.pNext = DE_NULL;
1052
1053 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1054 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1055 subgroupProperties.pNext = &subgroupSizeControlProperties;
1056
1057 VkPhysicalDeviceProperties2 properties;
1058 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1059 properties.pNext = &subgroupProperties;
1060
1061 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1062
1063 if (subgroupProperties.subgroupSize > subgroupSizeControlProperties.maxSubgroupSize ||
1064 subgroupProperties.subgroupSize < subgroupSizeControlProperties.minSubgroupSize)
1065 {
1066 ostringstream error;
1067 error << "subgroupSize (" << subgroupProperties.subgroupSize << ") is not between maxSubgroupSize (";
1068 error << subgroupSizeControlProperties.maxSubgroupSize << ") and minSubgroupSize (";
1069 error << subgroupSizeControlProperties.minSubgroupSize << ")";
1070
1071 return TestStatus::fail(error.str().c_str());
1072 }
1073
1074 return TestStatus::pass("OK");
1075 }
1076 } // namespace
1077
1078 namespace vkt
1079 {
1080 namespace subgroups
1081 {
createSubgroupsSizeControlTests(TestContext & testCtx)1082 TestCaseGroup *createSubgroupsSizeControlTests(TestContext &testCtx)
1083 {
1084 de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "size_control"));
1085 de::MovePtr<TestCaseGroup> framebufferGroup(new TestCaseGroup(testCtx, "framebuffer"));
1086 de::MovePtr<TestCaseGroup> computeGroup(new TestCaseGroup(testCtx, "compute"));
1087 de::MovePtr<TestCaseGroup> graphicsGroup(new TestCaseGroup(testCtx, "graphics"));
1088 #ifndef CTS_USES_VULKANSC
1089 de::MovePtr<TestCaseGroup> raytracingGroup(new TestCaseGroup(testCtx, "ray_tracing"));
1090 de::MovePtr<TestCaseGroup> meshGroup(new TestCaseGroup(testCtx, "mesh"));
1091 #endif // CTS_USES_VULKANSC
1092 de::MovePtr<TestCaseGroup> genericGroup(new TestCaseGroup(testCtx, "generic"));
1093 const VkShaderStageFlags fbStages[] = {
1094 VK_SHADER_STAGE_VERTEX_BIT,
1095 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1096 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1097 VK_SHADER_STAGE_GEOMETRY_BIT,
1098 VK_SHADER_STAGE_FRAGMENT_BIT,
1099 };
1100 #ifndef CTS_USES_VULKANSC
1101 const VkShaderStageFlags meshStages[] = {
1102 VK_SHADER_STAGE_MESH_BIT_EXT,
1103 VK_SHADER_STAGE_TASK_BIT_EXT,
1104 };
1105 #endif // CTS_USES_VULKANSC
1106
1107 // Test sanity of the subgroup size properties.
1108 {
1109 addFunctionCase(genericGroup.get(), "subgroup_size_properties", supportedCheck,
1110 testSanitySubgroupSizeProperties);
1111 }
1112
1113 const TestParams testParams[] = {{false, true, ""}, {true, false, "_spirv16"}, {true, true, "_flags_spirv16"}};
1114
1115 for (const auto ¶ms : testParams)
1116 {
1117 // Allow varying subgroup cases.
1118 const uint32_t flagsVary = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1119 const CaseDefinition caseDefVary = {params.flagsEnabled ? flagsVary : 0u,
1120 VK_SHADER_STAGE_COMPUTE_BIT,
1121 false,
1122 REQUIRED_SUBGROUP_SIZE_NONE,
1123 de::SharedPtr<bool>(new bool),
1124 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1125
1126 addFunctionCaseWithPrograms(computeGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1127 supportedCheckFeatures, initPrograms, test, caseDefVary);
1128 addFunctionCaseWithPrograms(graphicsGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1129 supportedCheckFeaturesShader, initPrograms, test, caseDefVary);
1130
1131 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1132 {
1133 const CaseDefinition caseDefStage = {params.flagsEnabled ? flagsVary : 0u,
1134 fbStages[stageIndex],
1135 false,
1136 REQUIRED_SUBGROUP_SIZE_NONE,
1137 de::SharedPtr<bool>(new bool),
1138 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1139
1140 string name =
1141 getShaderStageName(caseDefStage.shaderStage) + "_allow_varying_subgroup_size" + params.postfix;
1142 addFunctionCaseWithPrograms(framebufferGroup.get(), name, supportedCheckFeaturesShader,
1143 initFrameBufferPrograms, noSSBOtest, caseDefStage);
1144 }
1145
1146 #ifndef CTS_USES_VULKANSC
1147 for (const auto &stage : meshStages)
1148 {
1149 const CaseDefinition caseDefMesh = {(params.flagsEnabled ? flagsVary : 0u),
1150 stage,
1151 false,
1152 REQUIRED_SUBGROUP_SIZE_NONE,
1153 de::SharedPtr<bool>(new bool),
1154 (params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4)};
1155 const std::string name = getShaderStageName(stage) + "_allow_varying_subgroup_size" + params.postfix;
1156 addFunctionCaseWithPrograms(meshGroup.get(), name, supportedCheckFeatures, initPrograms, test, caseDefMesh);
1157 }
1158 #endif // CTS_USES_VULKANSC
1159
1160 // Require full subgroups together with allow varying subgroup (only compute shaders).
1161 const uint32_t flagsFullVary = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT |
1162 VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1163 const CaseDefinition caseDefFullVary = {params.flagsEnabled ? flagsFullVary : 0u,
1164 VK_SHADER_STAGE_COMPUTE_BIT,
1165 true,
1166 REQUIRED_SUBGROUP_SIZE_NONE,
1167 de::SharedPtr<bool>(new bool),
1168 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1169 addFunctionCaseWithPrograms(
1170 computeGroup.get(), "require_full_subgroups_allow_varying_subgroup_size" + params.postfix,
1171 supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDefFullVary);
1172
1173 // Require full subgroups cases (only compute shaders).
1174 const uint32_t flagsFull = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT;
1175 const CaseDefinition caseDefFull = {params.flagsEnabled ? flagsFull : 0u,
1176 VK_SHADER_STAGE_COMPUTE_BIT,
1177 true,
1178 REQUIRED_SUBGROUP_SIZE_NONE,
1179 de::SharedPtr<bool>(new bool),
1180 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1181 addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups" + params.postfix,
1182 supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups,
1183 caseDefFull);
1184
1185 // Tests to check setting a required subgroup size value, together with require full subgroups (only compute shaders).
1186 const CaseDefinition caseDefMaxFull = {params.flagsEnabled ? flagsFull : 0u,
1187 VK_SHADER_STAGE_COMPUTE_BIT,
1188 true,
1189 REQUIRED_SUBGROUP_SIZE_MAX,
1190 de::SharedPtr<bool>(new bool),
1191 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1192 addFunctionCaseWithPrograms(
1193 computeGroup.get(), "required_subgroup_size_max_require_full_subgroups" + params.postfix,
1194 supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMaxFull);
1195
1196 const CaseDefinition caseDefMinFull = {params.flagsEnabled ? flagsFull : 0u,
1197 VK_SHADER_STAGE_COMPUTE_BIT,
1198 true,
1199 REQUIRED_SUBGROUP_SIZE_MIN,
1200 de::SharedPtr<bool>(new bool),
1201 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1202 addFunctionCaseWithPrograms(
1203 computeGroup.get(), "required_subgroup_size_min_require_full_subgroups" + params.postfix,
1204 supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMinFull);
1205
1206 // Ray tracing cases with allow varying subgroup.
1207 #ifndef CTS_USES_VULKANSC
1208 const uint32_t flagsRayTracing = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1209 const CaseDefinition caseDefAllRaytracing = {params.flagsEnabled ? flagsRayTracing : 0u,
1210 SHADER_STAGE_ALL_RAY_TRACING,
1211 false,
1212 REQUIRED_SUBGROUP_SIZE_NONE,
1213 de::SharedPtr<bool>(new bool),
1214 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4};
1215 addFunctionCaseWithPrograms(raytracingGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1216 supportedCheckFeaturesShader, initPrograms, test, caseDefAllRaytracing);
1217 #endif // CTS_USES_VULKANSC
1218 }
1219
1220 // Tests to check setting a required subgroup size value.
1221 {
1222 const CaseDefinition caseDefAllGraphicsMax = {0u,
1223 VK_SHADER_STAGE_ALL_GRAPHICS,
1224 false,
1225 REQUIRED_SUBGROUP_SIZE_MAX,
1226 de::SharedPtr<bool>(new bool),
1227 SPIRV_VERSION_1_3};
1228 addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader,
1229 initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMax);
1230 const CaseDefinition caseDefComputeMax = {0u,
1231 VK_SHADER_STAGE_COMPUTE_BIT,
1232 false,
1233 REQUIRED_SUBGROUP_SIZE_MAX,
1234 de::SharedPtr<bool>(new bool),
1235 SPIRV_VERSION_1_3};
1236 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max", supportedCheckFeatures,
1237 initPrograms, testRequireSubgroupSize, caseDefComputeMax);
1238 #ifndef CTS_USES_VULKANSC
1239 const CaseDefinition caseDefAllRaytracingMax = {0u,
1240 SHADER_STAGE_ALL_RAY_TRACING,
1241 false,
1242 REQUIRED_SUBGROUP_SIZE_MAX,
1243 de::SharedPtr<bool>(new bool),
1244 SPIRV_VERSION_1_4};
1245 addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader,
1246 initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMax);
1247 #endif // CTS_USES_VULKANSC
1248
1249 const CaseDefinition caseDefAllGraphicsMin = {0u,
1250 VK_SHADER_STAGE_ALL_GRAPHICS,
1251 false,
1252 REQUIRED_SUBGROUP_SIZE_MIN,
1253 de::SharedPtr<bool>(new bool),
1254 SPIRV_VERSION_1_3};
1255 addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader,
1256 initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMin);
1257 const CaseDefinition caseDefComputeMin = {0u,
1258 VK_SHADER_STAGE_COMPUTE_BIT,
1259 false,
1260 REQUIRED_SUBGROUP_SIZE_MIN,
1261 de::SharedPtr<bool>(new bool),
1262 SPIRV_VERSION_1_3};
1263 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min", supportedCheckFeatures,
1264 initPrograms, testRequireSubgroupSize, caseDefComputeMin);
1265 #ifndef CTS_USES_VULKANSC
1266 const CaseDefinition caseDefAllRaytracingMin = {0u,
1267 SHADER_STAGE_ALL_RAY_TRACING,
1268 false,
1269 REQUIRED_SUBGROUP_SIZE_MIN,
1270 de::SharedPtr<bool>(new bool),
1271 SPIRV_VERSION_1_4};
1272 addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader,
1273 initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMin);
1274 #endif // CTS_USES_VULKANSC
1275 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1276 {
1277 const CaseDefinition caseDefStageMax = {0u,
1278 fbStages[stageIndex],
1279 false,
1280 REQUIRED_SUBGROUP_SIZE_MAX,
1281 de::SharedPtr<bool>(new bool),
1282 SPIRV_VERSION_1_3};
1283 addFunctionCaseWithPrograms(
1284 framebufferGroup.get(), getShaderStageName(caseDefStageMax.shaderStage) + "_required_subgroup_size_max",
1285 supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMax);
1286 const CaseDefinition caseDefStageMin = {0u,
1287 fbStages[stageIndex],
1288 false,
1289 REQUIRED_SUBGROUP_SIZE_MIN,
1290 de::SharedPtr<bool>(new bool),
1291 SPIRV_VERSION_1_3};
1292 addFunctionCaseWithPrograms(
1293 framebufferGroup.get(), getShaderStageName(caseDefStageMin.shaderStage) + "_required_subgroup_size_min",
1294 supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMin);
1295 }
1296
1297 #ifndef CTS_USES_VULKANSC
1298 for (const auto &stage : meshStages)
1299 {
1300 const auto stageName = getShaderStageName(stage);
1301
1302 const CaseDefinition caseDefMeshMax = {
1303 0u, stage, false, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1304 addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_max_" + stageName,
1305 supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMax);
1306 const CaseDefinition caseDefMeshMin = {
1307 0u, stage, false, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1308 addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_min_" + stageName,
1309 supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMin);
1310 }
1311 #endif // CTS_USES_VULKANSC
1312 }
1313
1314 group->addChild(genericGroup.release());
1315 group->addChild(graphicsGroup.release());
1316 group->addChild(computeGroup.release());
1317 group->addChild(framebufferGroup.release());
1318 #ifndef CTS_USES_VULKANSC
1319 group->addChild(raytracingGroup.release());
1320 group->addChild(meshGroup.release());
1321 #endif // CTS_USES_VULKANSC
1322
1323 return group.release();
1324 }
1325
1326 } // namespace subgroups
1327 } // namespace vkt
1328