1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief VK_EXT_subgroup_size_control Tests
23  */ /*--------------------------------------------------------------------*/
24 
25 #include "vktSubgroupsSizeControlTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "tcuTestLog.hpp"
29 
30 #include <string>
31 #include <vector>
32 #include <algorithm>
33 
34 using namespace tcu;
35 using namespace std;
36 using namespace vk;
37 using namespace vkt;
38 
39 namespace
40 {
41 
42 enum RequiredSubgroupSizeMode
43 {
44     REQUIRED_SUBGROUP_SIZE_NONE = 0,
45     REQUIRED_SUBGROUP_SIZE_MIN  = 1,
46     REQUIRED_SUBGROUP_SIZE_MAX  = 2,
47 };
48 
49 struct CaseDefinition
50 {
51     uint32_t pipelineShaderStageCreateFlags;
52     VkShaderStageFlags shaderStage;
53     bool requiresBallot;
54     uint32_t requiredSubgroupSizeMode;
55     de::SharedPtr<bool> geometryPointSizeSupported;
56     SpirvVersion spirvVersion;
57 
hasFullSubgroupsFlag__anon0760b57c0111::CaseDefinition58     bool hasFullSubgroupsFlag(void) const
59     {
60         return ((pipelineShaderStageCreateFlags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) !=
61                 0u);
62     }
63 
shaderUsesFullSubgroups__anon0760b57c0111::CaseDefinition64     bool shaderUsesFullSubgroups(void) const
65     {
66         return (hasFullSubgroupsFlag() || (spirvVersion >= SPIRV_VERSION_1_6));
67     }
68 };
69 
70 struct TestParams
71 {
72     bool useSpirv16;
73     bool flagsEnabled;
74     string postfix;
75 };
76 
77 struct internalDataStruct
78 {
79     const Context *context;
80     struct CaseDefinition caseDef;
81     const uint32_t requiredSubgroupSize;
82     const bool
83         isRequiredSubgroupSize; // Indicates if the test uses VkPipelineShaderStageRequiredSubgroupSizeCreateInfo.
84 };
85 
makeDeBool(bool value)86 inline bool makeDeBool(bool value)
87 {
88     return (value ? true : false);
89 }
90 
getLocalSizes(const uint32_t maxWorkGroupSize[3],uint32_t maxWorkGroupInvocations,uint32_t numWorkGroupInvocations)91 UVec3 getLocalSizes(const uint32_t maxWorkGroupSize[3], uint32_t maxWorkGroupInvocations,
92                     uint32_t numWorkGroupInvocations)
93 {
94     DE_ASSERT(numWorkGroupInvocations <= maxWorkGroupInvocations);
95     DE_UNREF(maxWorkGroupInvocations); // For release builds.
96 
97     const uint32_t localSizeX = de::gcd(numWorkGroupInvocations, maxWorkGroupSize[0]);
98     const uint32_t localSizeY = de::gcd(std::max(numWorkGroupInvocations / localSizeX, 1u), maxWorkGroupSize[1]);
99     const uint32_t localSizeZ = std::max(numWorkGroupInvocations / (localSizeX * localSizeY), 1u);
100 
101     return UVec3(localSizeX, localSizeY, localSizeZ);
102 }
103 
getRequiredSubgroupSizeFromMode(Context & context,const CaseDefinition & caseDef,const VkPhysicalDeviceSubgroupSizeControlProperties & subgroupSizeControlProperties)104 uint32_t getRequiredSubgroupSizeFromMode(
105     Context &context, const CaseDefinition &caseDef,
106 #ifndef CTS_USES_VULKANSC
107     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties)
108 #else
109     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties)
110 #endif // CTS_USES_VULKANSC
111 {
112     switch (caseDef.requiredSubgroupSizeMode)
113     {
114     case REQUIRED_SUBGROUP_SIZE_MAX:
115         return subgroupSizeControlProperties.maxSubgroupSize;
116     case REQUIRED_SUBGROUP_SIZE_MIN:
117         return subgroupSizeControlProperties.minSubgroupSize;
118     case REQUIRED_SUBGROUP_SIZE_NONE:
119         return subgroups::getSubgroupSize(context);
120     default:
121         TCU_THROW(NotSupportedError, "Unsupported Subgroup size");
122     }
123 }
124 
checkVertexPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)125 static bool checkVertexPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width, uint32_t)
126 {
127     const struct internalDataStruct *checkInternalData =
128         reinterpret_cast<const struct internalDataStruct *>(internalData);
129     const Context *context = checkInternalData->context;
130 #ifndef CTS_USES_VULKANSC
131     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
132         context->getSubgroupSizeControlProperties();
133 #else
134     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
135         context->getSubgroupSizeControlPropertiesEXT();
136 #endif // CTS_USES_VULKANSC
137 
138     TestLog &log         = context->getTestContext().getLog();
139     const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
140 
141     for (uint32_t i = 0; i < width; i++)
142     {
143         if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
144             data[i] < subgroupSizeControlProperties.minSubgroupSize)
145         {
146             log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") value is outside limits ("
147                 << subgroupSizeControlProperties.minSubgroupSize << ", "
148                 << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
149 
150             return false;
151         }
152 
153         if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
154         {
155             log << TestLog::Message << "gl_SubgroupSize (" << data[i]
156                 << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize
157                 << ")" << TestLog::EndMessage;
158 
159             return false;
160         }
161     }
162 
163     return true;
164 }
165 
checkFragmentPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t height,uint32_t)166 static bool checkFragmentPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width,
167                                         uint32_t height, uint32_t)
168 {
169     const struct internalDataStruct *checkInternalData =
170         reinterpret_cast<const struct internalDataStruct *>(internalData);
171     const Context *context = checkInternalData->context;
172 #ifndef CTS_USES_VULKANSC
173     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
174         context->getSubgroupSizeControlProperties();
175 #else
176     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
177         context->getSubgroupSizeControlPropertiesEXT();
178 #endif // CTS_USES_VULKANSC
179     TestLog &log         = context->getTestContext().getLog();
180     const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
181 
182     for (uint32_t x = 0u; x < width; ++x)
183     {
184         for (uint32_t y = 0u; y < height; ++y)
185         {
186             const uint32_t ndx = (x * height + y);
187 
188             if (data[ndx] > subgroupSizeControlProperties.maxSubgroupSize ||
189                 data[ndx] < subgroupSizeControlProperties.minSubgroupSize)
190             {
191                 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") value is outside limits ("
192                     << subgroupSizeControlProperties.minSubgroupSize << ", "
193                     << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
194 
195                 return false;
196             }
197 
198             if (checkInternalData->isRequiredSubgroupSize && data[ndx] != checkInternalData->requiredSubgroupSize)
199             {
200                 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx]
201                     << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize
202                     << ")" << TestLog::EndMessage;
203 
204                 return false;
205             }
206         }
207     }
208     return true;
209 }
210 
checkCompute(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)211 static bool checkCompute(const void *internalData, vector<const void *> datas, const uint32_t numWorkgroups[3],
212                          const uint32_t localSize[3], uint32_t)
213 {
214     const struct internalDataStruct *checkInternalData =
215         reinterpret_cast<const struct internalDataStruct *>(internalData);
216     const Context *context = checkInternalData->context;
217 #ifndef CTS_USES_VULKANSC
218     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
219         context->getSubgroupSizeControlProperties();
220 #else
221     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
222         context->getSubgroupSizeControlPropertiesEXT();
223 #endif // CTS_USES_VULKANSC
224     TestLog &log               = context->getTestContext().getLog();
225     const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
226     const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
227     const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
228     const uint32_t width       = globalSizeX * globalSizeY * globalSizeZ;
229     const uint32_t *data       = reinterpret_cast<const uint32_t *>(datas[0]);
230 
231     for (uint32_t i = 0; i < width; i++)
232     {
233         if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
234             data[i] < subgroupSizeControlProperties.minSubgroupSize)
235         {
236             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
237                 << "gl_SubgroupSize (" << data[i] << ") value is outside limits ("
238                 << subgroupSizeControlProperties.minSubgroupSize << ", "
239                 << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
240 
241             return false;
242         }
243 
244         if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
245         {
246             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
247                 << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value ("
248                 << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
249 
250             return false;
251         }
252     }
253 
254     return true;
255 }
256 
checkComputeRequireFull(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)257 static bool checkComputeRequireFull(const void *internalData, vector<const void *> datas,
258                                     const uint32_t numWorkgroups[3], const uint32_t localSize[3], uint32_t)
259 {
260     const struct internalDataStruct *checkInternalData =
261         reinterpret_cast<const struct internalDataStruct *>(internalData);
262     const Context *context = checkInternalData->context;
263 #ifndef CTS_USES_VULKANSC
264     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
265         context->getSubgroupSizeControlProperties();
266 #else
267     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
268         context->getSubgroupSizeControlPropertiesEXT();
269 #endif // CTS_USES_VULKANSC
270     TestLog &log               = context->getTestContext().getLog();
271     const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
272     const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
273     const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
274     const uint32_t width       = globalSizeX * globalSizeY * globalSizeZ;
275     const UVec4 *data          = reinterpret_cast<const UVec4 *>(datas[0]);
276     const uint32_t numSubgroups =
277         (localSize[0] * localSize[1] * localSize[2]) / checkInternalData->requiredSubgroupSize;
278     const bool exactSubgroupSize =
279         (checkInternalData->caseDef.shaderUsesFullSubgroups() && checkInternalData->isRequiredSubgroupSize);
280 
281     for (uint32_t i = 0; i < width; i++)
282     {
283         if (data[i].x() > subgroupSizeControlProperties.maxSubgroupSize ||
284             data[i].x() < subgroupSizeControlProperties.minSubgroupSize)
285         {
286             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
287                 << "gl_SubgroupSize value ( " << data[i].x() << ") is outside limits ["
288                 << subgroupSizeControlProperties.minSubgroupSize << ", "
289                 << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
290             return false;
291         }
292 
293         if (data[i].x() != data[i].y())
294         {
295             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
296                 << "gl_SubgroupSize ( " << data[i].x() << ") does not match the active number of subgroup invocations ("
297                 << data[i].y() << ")" << TestLog::EndMessage;
298             return false;
299         }
300 
301         if (exactSubgroupSize && data[i].x() != checkInternalData->requiredSubgroupSize)
302         {
303             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
304                 << "expected subgroupSize (" << checkInternalData->requiredSubgroupSize
305                 << ") doesn't match gl_SubgroupSize ( " << data[i].x() << ")" << TestLog::EndMessage;
306             return false;
307         }
308 
309         if (exactSubgroupSize && data[i].z() != numSubgroups)
310         {
311             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
312                 << "expected number of subgroups dispatched (" << numSubgroups << ") doesn't match gl_NumSubgroups ("
313                 << data[i].z() << ")" << TestLog::EndMessage;
314             return false;
315         }
316     }
317 
318     return true;
319 }
320 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)321 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
322 {
323     const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
324 
325     if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
326         subgroups::setFragmentShaderFrameBuffer(programCollection);
327 
328     if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage && VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
329         subgroups::setVertexShaderFrameBuffer(programCollection);
330 
331     string bdyStr = "uint tempResult = gl_SubgroupSize;\n";
332 
333     if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
334     {
335         ostringstream vertex;
336 
337         vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
338                << "#extension GL_KHR_shader_subgroup_basic: enable\n"
339                << "layout(location = 0) in highp vec4 in_position;\n"
340                << "layout(location = 0) out float out_color;\n"
341                << "\n"
342                << "void main (void)\n"
343                << "{\n"
344                << bdyStr << "  out_color = float(tempResult);\n"
345                << "  gl_Position = in_position;\n"
346                << "  gl_PointSize = 1.0f;\n"
347                << "}\n";
348 
349         programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
350     }
351     else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
352     {
353         ostringstream geometry;
354 
355         geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
356                  << "#extension GL_KHR_shader_subgroup_basic: enable\n"
357                  << "layout(points) in;\n"
358                  << "layout(points, max_vertices = 1) out;\n"
359                  << "layout(location = 0) out float out_color;\n"
360                  << "void main (void)\n"
361                  << "{\n"
362                  << bdyStr << "  out_color = float(tempResult);\n"
363                  << "  gl_Position = gl_in[0].gl_Position;\n"
364                  << "  gl_PointSize = 1.0f;"
365                  << "  EmitVertex();\n"
366                  << "  EndPrimitive();\n"
367                  << "}\n";
368 
369         programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
370     }
371     else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
372     {
373         ostringstream controlSource;
374 
375         controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
376                       << "#extension GL_KHR_shader_subgroup_basic: enable\n"
377                       << "layout(vertices = 2) out;\n"
378                       << "layout(location = 0) out float out_color[];\n"
379                       << "\n"
380                       << "void main (void)\n"
381                       << "{\n"
382                       << "  if (gl_InvocationID == 0)\n"
383                       << "  {\n"
384                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
385                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
386                       << "  }\n"
387                       << bdyStr << "  out_color[gl_InvocationID ] = float(tempResult);\n"
388                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
389                       << "}\n";
390 
391         programCollection.glslSources.add("tesc")
392             << glu::TessellationControlSource(controlSource.str()) << buildOptions;
393         subgroups::setTesEvalShaderFrameBuffer(programCollection);
394     }
395     else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
396     {
397         ostringstream evaluationSource;
398         evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
399                          << "#extension GL_KHR_shader_subgroup_basic: enable\n"
400                          << "layout(isolines, equal_spacing, ccw ) in;\n"
401                          << "layout(location = 0) out float out_color;\n"
402                          << "void main (void)\n"
403                          << "{\n"
404                          << bdyStr << "  out_color  = float(tempResult);\n"
405                          << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
406                          << "}\n";
407 
408         subgroups::setTesCtrlShaderFrameBuffer(programCollection);
409         programCollection.glslSources.add("tese")
410             << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
411     }
412     else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
413     {
414         const string vertex = string(glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)) +
415                               "\n"
416                               "void main (void)\n"
417                               "{\n"
418                               "  vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
419                               "  gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
420                               "  gl_PointSize = 1.0f;\n"
421                               "}\n";
422         programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
423 
424         ostringstream fragmentSource;
425 
426         fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
427                        << "precision highp int;\n"
428                        << "#extension GL_KHR_shader_subgroup_basic: enable\n"
429                        << "layout(location = 0) out uint out_color;\n"
430                        << "void main()\n"
431                        << "{\n"
432                        << bdyStr << "     out_color = tempResult;\n"
433                        << "}\n";
434 
435         programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSource.str()) << buildOptions;
436     }
437     else
438     {
439         DE_FATAL("Unsupported shader stage");
440     }
441 }
442 
getExtHeader(const CaseDefinition &)443 string getExtHeader(const CaseDefinition &)
444 {
445     return "#extension GL_KHR_shader_subgroup_basic: enable\n";
446 }
447 
getPerStageHeadDeclarations(const CaseDefinition & caseDef)448 vector<string> getPerStageHeadDeclarations(const CaseDefinition &caseDef)
449 {
450     const uint32_t stageCount = subgroups::getStagesCount(caseDef.shaderStage);
451     const bool fragment       = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
452     vector<string> result(stageCount, string());
453 
454     if (fragment)
455         result.reserve(result.size() + 1);
456 
457     for (size_t i = 0; i < result.size(); ++i)
458     {
459         result[i] = "layout(set = 0, binding = " + de::toString(i) +
460                     ", std430) buffer Buffer1\n"
461                     "{\n"
462                     "  uint result[];\n"
463                     "};\n";
464     }
465 
466     if (fragment)
467     {
468         const string fragPart = "layout(location = 0) out uint result;\n";
469 
470         result.push_back(fragPart);
471     }
472 
473     return result;
474 }
475 
getTestSource(const CaseDefinition &)476 string getTestSource(const CaseDefinition &)
477 {
478     return "  uint tempResult = gl_SubgroupSize;\n"
479            "  tempRes = tempResult;\n";
480 }
481 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)482 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
483 {
484     ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u,
485                                     (caseDef.spirvVersion == vk::SPIRV_VERSION_1_4));
486     const string extHeader                = getExtHeader(caseDef);
487     const string testSrc                  = getTestSource(caseDef);
488     const vector<string> headDeclarations = getPerStageHeadDeclarations(caseDef);
489 
490     subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
491                                *caseDef.geometryPointSizeSupported, extHeader, testSrc, "", headDeclarations);
492 }
493 
initProgramsRequireFull(SourceCollections & programCollection,CaseDefinition caseDef)494 void initProgramsRequireFull(SourceCollections &programCollection, CaseDefinition caseDef)
495 {
496     if (VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
497         DE_FATAL("Unsupported shader stage");
498 
499     ostringstream src;
500 
501     src << "#version 450\n"
502         << "#extension GL_KHR_shader_subgroup_basic: enable\n"
503         << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
504         << "layout (local_size_x_id = 0, local_size_y_id = 1, "
505            "local_size_z_id = 2) in;\n"
506         << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
507         << "{\n"
508         << "  uvec4 result[];\n"
509         << "};\n"
510         << "\n"
511         << "void main (void)\n"
512         << "{\n"
513         << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
514         << "  highp uint offset = globalSize.x * ((globalSize.y * "
515            "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
516            "gl_GlobalInvocationID.x;\n"
517         << "   result[offset].x = gl_SubgroupSize;\n" // save the subgroup size value
518         << "   uint numActive = subgroupBallotBitCount(subgroupBallot(true));\n"
519         << "   result[offset].y = numActive;\n"       // save the number of active subgroup invocations
520         << "   result[offset].z = gl_NumSubgroups;\n" // save the number of subgroups dispatched.
521         << "}\n";
522 
523     programCollection.glslSources.add("comp")
524         << glu::ComputeSource(src.str())
525         << ShaderBuildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
526 }
527 
supportedCheck(Context & context)528 void supportedCheck(Context &context)
529 {
530     if (!subgroups::isSubgroupSupported(context))
531         TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
532 
533     context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
534 }
535 
supportedCheckFeatures(Context & context,CaseDefinition caseDef)536 void supportedCheckFeatures(Context &context, CaseDefinition caseDef)
537 {
538     supportedCheck(context);
539 
540     if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
541     {
542         TCU_THROW(NotSupportedError, "Shader stage is required to support subgroup operations!");
543     }
544 
545     if (caseDef.shaderStage == VK_SHADER_STAGE_ALL_GRAPHICS)
546     {
547         const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
548 
549         if (!features.tessellationShader || !features.geometryShader)
550             TCU_THROW(NotSupportedError, "Device does not support tessellation or geometry shaders");
551     }
552 
553     if (caseDef.requiresBallot &&
554         !subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
555     {
556         TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
557     }
558 
559     if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE ||
560         caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
561     {
562 #ifndef CTS_USES_VULKANSC
563         const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
564             context.getSubgroupSizeControlFeatures();
565 #else
566         const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
567             context.getSubgroupSizeControlFeaturesEXT();
568 #endif // CTS_USES_VULKANSC
569 
570         if (subgroupSizeControlFeatures.subgroupSizeControl == false)
571             TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
572 
573         if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE)
574         {
575 #ifndef CTS_USES_VULKANSC
576             const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
577                 context.getSubgroupSizeControlProperties();
578 #else
579             const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
580                 context.getSubgroupSizeControlPropertiesEXT();
581 #endif // CTS_USES_VULKANSC
582 
583             if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
584                 TCU_THROW(NotSupportedError,
585                           "Device does not support setting required subgroup size for the stages selected");
586         }
587     }
588 
589     if (caseDef.hasFullSubgroupsFlag())
590     {
591 #ifndef CTS_USES_VULKANSC
592         const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
593             context.getSubgroupSizeControlFeatures();
594 #else
595         const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
596             context.getSubgroupSizeControlFeaturesEXT();
597 #endif // CTS_USES_VULKANSC
598 
599         if (subgroupSizeControlFeatures.computeFullSubgroups == false)
600             TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
601     }
602 
603     *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
604 
605 #ifndef CTS_USES_VULKANSC
606     if (isAllRayTracingStages(caseDef.shaderStage))
607     {
608         context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
609     }
610     else if (isAllMeshShadingStages(caseDef.shaderStage))
611     {
612         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
613         context.requireDeviceFunctionality("VK_EXT_mesh_shader");
614 
615         if ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u)
616         {
617             const auto &features = context.getMeshShaderFeaturesEXT();
618             if (!features.taskShader)
619                 TCU_THROW(NotSupportedError, "Task shaders not supported");
620         }
621     }
622 #endif // CTS_USES_VULKANSC
623 
624     if (caseDef.spirvVersion > vk::getMaxSpirvVersionForVulkan(context.getUsedApiVersion()))
625         TCU_THROW(NotSupportedError, "Shader requires SPIR-V version higher than available");
626 }
627 
supportedCheckFeaturesShader(Context & context,CaseDefinition caseDef)628 void supportedCheckFeaturesShader(Context &context, CaseDefinition caseDef)
629 {
630     supportedCheckFeatures(context, caseDef);
631 
632     subgroups::supportedCheckShader(context, caseDef.shaderStage);
633 }
634 
noSSBOtest(Context & context,const CaseDefinition caseDef)635 TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
636 {
637     const VkFormat format                        = VK_FORMAT_R32_UINT;
638     const uint32_t &flags                        = caseDef.pipelineShaderStageCreateFlags;
639     const struct internalDataStruct internalData = {
640         &context,
641         caseDef,
642         0u,
643         false,
644     };
645 
646     switch (caseDef.shaderStage)
647     {
648     case VK_SHADER_STAGE_VERTEX_BIT:
649         return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
650                                                                         checkVertexPipelineStages, flags, 0u);
651     case VK_SHADER_STAGE_GEOMETRY_BIT:
652         return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
653                                                                           checkVertexPipelineStages, flags, 0u);
654     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
655         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
656             context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
657     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
658         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
659             context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
660     case VK_SHADER_STAGE_FRAGMENT_BIT:
661         return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
662                                                                           checkFragmentPipelineStages, flags, 0u);
663     default:
664         TCU_THROW(InternalError, "Unhandled shader stage");
665     }
666 }
667 
test(Context & context,const CaseDefinition caseDef)668 TestStatus test(Context &context, const CaseDefinition caseDef)
669 {
670     if (isAllComputeStages(caseDef.shaderStage))
671     {
672         const uint32_t numWorkgroups[3]      = {1, 1, 1};
673         const uint32_t subgroupSize          = subgroups::getSubgroupSize(context);
674         const auto &physicalDeviceProperties = context.getDeviceProperties();
675         const auto &maxWorkGroupSize         = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
676         const auto &maxInvocations           = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
677         // Calculate the local workgroup sizes to exercise the maximum supported by the driver
678         const UVec3 localSize                = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
679         const uint32_t localSizesToTestCount = 16;
680         const uint32_t localSizesToTest[localSizesToTestCount][3] = {
681             {1, 1, 1},
682             {32, 4, 1},
683             {32, 1, 4},
684             {1, 32, 4},
685             {1, 4, 32},
686             {4, 1, 32},
687             {4, 32, 1},
688             {subgroupSize, 1, 1},
689             {1, subgroupSize, 1},
690             {1, 1, subgroupSize},
691             {3, 5, 7},
692             {128, 1, 1},
693             {1, 128, 1},
694             {1, 1, 64},
695             {localSize.x(), localSize.y(), localSize.z()},
696             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
697         };
698         const struct internalDataStruct internalData = {
699             &context,
700             caseDef,
701             subgroupSize,
702             false,
703         };
704 
705         return subgroups::makeComputeTestRequiredSubgroupSize(
706             context, VK_FORMAT_R32_UINT, DE_NULL, 0, &internalData, checkCompute,
707             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
708             subgroupSize, localSizesToTest, localSizesToTestCount);
709     }
710 #ifndef CTS_USES_VULKANSC
711     else if (isAllMeshShadingStages(caseDef.shaderStage))
712     {
713         const bool isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
714         const bool isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
715 
716         DE_ASSERT(isMesh != isTask);
717         DE_UNREF(isTask); // For release builds.
718 
719         const uint32_t numWorkgroups[3] = {1, 1, 1};
720         const uint32_t subgroupSize     = subgroups::getSubgroupSize(context);
721         const auto &meshProperties      = context.getMeshShaderPropertiesEXT();
722         const auto &maxWorkGroupSize =
723             (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
724         const auto &maxInvocations =
725             (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
726         // Calculate the local workgroup sizes to exercise the maximum supported by the driver
727         const UVec3 localSize                = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
728         const uint32_t localSizesToTestCount = 16;
729         const uint32_t localSizesToTest[localSizesToTestCount][3] = {
730             {1, 1, 1},
731             {32, 4, 1},
732             {32, 1, 4},
733             {1, 32, 4},
734             {1, 4, 32},
735             {4, 1, 32},
736             {4, 32, 1},
737             {subgroupSize, 1, 1},
738             {1, subgroupSize, 1},
739             {1, 1, subgroupSize},
740             {3, 5, 7},
741             {128, 1, 1},
742             {1, 128, 1},
743             {1, 1, 64},
744             {localSize.x(), localSize.y(), localSize.z()},
745             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
746         };
747         const struct internalDataStruct internalData = {
748             &context,
749             caseDef,
750             subgroupSize,
751             false,
752         };
753 
754         return subgroups::makeMeshTestRequiredSubgroupSize(
755             context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkCompute,
756             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
757             subgroupSize, localSizesToTest, localSizesToTestCount);
758     }
759 #endif // CTS_USES_VULKANSC
760     else if (isAllGraphicsStages(caseDef.shaderStage))
761     {
762         const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
763         struct internalDataStruct internalData = {
764             &context,
765             caseDef,
766             0u,
767             false,
768         };
769 
770         return subgroups::allStagesRequiredSubgroupSize(
771             context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkVertexPipelineStages, stages,
772             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
773             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
774             caseDef.pipelineShaderStageCreateFlags, nullptr);
775     }
776 #ifndef CTS_USES_VULKANSC
777     else if (isAllRayTracingStages(caseDef.shaderStage))
778     {
779         const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
780         const vector<uint32_t> flags(6, caseDef.pipelineShaderStageCreateFlags);
781         const struct internalDataStruct internalData = {
782             &context,
783             caseDef,
784             0u,
785             false,
786         };
787 
788         return subgroups::allRayTracingStagesRequiredSubgroupSize(context, VK_FORMAT_R32_UINT, nullptr, 0,
789                                                                   &internalData, checkVertexPipelineStages, stages,
790                                                                   flags.data(), nullptr);
791     }
792 #endif // CTS_USES_VULKANSC
793     else
794         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
795 }
796 
testRequireFullSubgroups(Context & context,const CaseDefinition caseDef)797 TestStatus testRequireFullSubgroups(Context &context, const CaseDefinition caseDef)
798 {
799     DE_ASSERT(VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage);
800     DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
801 
802     const uint32_t numWorkgroups[3] = {1, 1, 1};
803 #ifndef CTS_USES_VULKANSC
804     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
805         context.getSubgroupSizeControlProperties();
806 #else
807     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
808         context.getSubgroupSizeControlPropertiesEXT();
809 #endif // CTS_USES_VULKANSC
810     const VkPhysicalDeviceProperties &physicalDeviceProperties = context.getDeviceProperties();
811     // Calculate the local workgroup sizes to exercise the maximum supported by the driver
812     const auto &maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
813     const auto &maxInvocations   = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
814     const UVec3 localSize        = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
815     const uint32_t subgroupSize  = subgroups::getSubgroupSize(context);
816     // For full subgroups and allow varying subgroup size, localsize X must be a multiple of maxSubgroupSize.
817     // We set local size X for this test to the maximum, regardless if allow varying subgroup size is enabled or not.
818     const uint32_t localSizesToTestCount                      = 7;
819     const uint32_t localSizesToTest[localSizesToTestCount][3] = {
820         {subgroupSizeControlProperties.maxSubgroupSize, 1, 1},
821         {subgroupSizeControlProperties.maxSubgroupSize, 4, 1},
822         {subgroupSizeControlProperties.maxSubgroupSize, 1, 4},
823         {subgroupSizeControlProperties.maxSubgroupSize * 2, 1, 2},
824         {subgroupSizeControlProperties.maxSubgroupSize * 4, 1, 1},
825         {localSize.x(), localSize.y(), localSize.z()},
826         {1, 1, 1} // Isn't used, just here to make double buffering checks easier
827     };
828     const struct internalDataStruct internalData = {
829         &context,
830         caseDef,
831         subgroupSize,
832         false,
833     };
834 
835     DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
836 
837     return subgroups::makeComputeTestRequiredSubgroupSize(
838         context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkComputeRequireFull,
839         caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
840         subgroupSize, localSizesToTest, localSizesToTestCount);
841 }
842 
testRequireSubgroupSize(Context & context,const CaseDefinition caseDef)843 TestStatus testRequireSubgroupSize(Context &context, const CaseDefinition caseDef)
844 {
845     if (isAllComputeStages(caseDef.shaderStage))
846     {
847         const uint32_t numWorkgroups[3] = {1, 1, 1};
848 #ifndef CTS_USES_VULKANSC
849         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
850             context.getSubgroupSizeControlProperties();
851 #else
852         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
853             context.getSubgroupSizeControlPropertiesEXT();
854 #endif // CTS_USES_VULKANSC
855         const VkPhysicalDeviceProperties &physicalDeviceProperties = context.getDeviceProperties();
856         const uint32_t requiredSubgroupSize =
857             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
858         const uint64_t maxSubgroupLimitSize =
859             (uint64_t)requiredSubgroupSize * subgroupSizeControlProperties.maxComputeWorkgroupSubgroups;
860         const uint32_t maxTotalLocalSize = (uint32_t)min<uint64_t>(
861             maxSubgroupLimitSize, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
862         const auto &maxWorkGroupSize          = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
863         const auto &maxInvocations            = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
864         const UVec3 localSize                 = getLocalSizes(maxWorkGroupSize, maxInvocations, maxTotalLocalSize);
865         const bool shaderUsesFullSubgroups    = caseDef.shaderUsesFullSubgroups();
866         const uint32_t localSizesToTest[5][3] = {
867             {localSize.x(), localSize.y(), localSize.z()},
868             {requiredSubgroupSize, 1, 1},
869             {1, requiredSubgroupSize, 1},
870             {1, 1, requiredSubgroupSize},
871             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
872         };
873 
874         // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
875         // subgroup size, as required by the spec.
876         uint32_t localSizesToTestCount = 5;
877         if (shaderUsesFullSubgroups)
878             localSizesToTestCount = 3;
879 
880         const internalDataStruct internalData = {
881             &context,             //  const Context* context;
882             caseDef,              //  struct CaseDefinition caseDef;
883             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
884             true,                 // bool isRequiredSubgroupSize;
885         };
886 
887         // Depending on the flag and SPIR-V version we need to run one verification function or another.
888         const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
889 
890         return subgroups::makeComputeTestRequiredSubgroupSize(
891             context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkFunction,
892             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
893             requiredSubgroupSize, localSizesToTest, localSizesToTestCount);
894     }
895 #ifndef CTS_USES_VULKANSC
896     else if (isAllMeshShadingStages(caseDef.shaderStage))
897     {
898         const auto isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
899         const auto isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
900 
901         DE_ASSERT(isMesh != isTask);
902         DE_UNREF(isTask); // For release builds.
903 
904         const uint32_t numWorkgroups[3]           = {1, 1, 1};
905         const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
906         const auto &meshProperties                = context.getMeshShaderPropertiesEXT();
907         const uint32_t requiredSubgroupSize =
908             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
909         const auto &maxWorkGroupSize =
910             (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
911         const auto &maxInvocations =
912             (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
913         const UVec3 localSize                 = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
914         const bool shaderUsesFullSubgroups    = caseDef.shaderUsesFullSubgroups();
915         const uint32_t localSizesToTest[5][3] = {
916             {requiredSubgroupSize, 1, 1},
917             {1, requiredSubgroupSize, 1},
918             {1, 1, requiredSubgroupSize},
919             {localSize.x(), localSize.y(), localSize.z()},
920             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
921         };
922 
923         // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
924         // subgroup size, as required by the spec.
925         uint32_t localSizesToTestCount = 5;
926         if (shaderUsesFullSubgroups)
927             localSizesToTestCount = 3;
928 
929         const internalDataStruct internalData = {
930             &context,             //  const Context* context;
931             caseDef,              //  struct CaseDefinition caseDef;
932             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
933             true,                 //  bool isRequiredSubgroupSize;
934         };
935 
936         // Depending on the flag and SPIR-V version we need to run one verification function or another.
937         const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
938 
939         return subgroups::makeMeshTestRequiredSubgroupSize(
940             context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkFunction,
941             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
942             requiredSubgroupSize, localSizesToTest, localSizesToTestCount);
943     }
944 #endif // CTS_USES_VULKANSC
945     else if (isAllGraphicsStages(caseDef.shaderStage))
946     {
947         const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
948 #ifndef CTS_USES_VULKANSC
949         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
950             context.getSubgroupSizeControlProperties();
951 #else
952         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
953             context.getSubgroupSizeControlPropertiesEXT();
954 #endif // CTS_USES_VULKANSC
955         const uint32_t requiredSubgroupSize =
956             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
957         const uint32_t requiredSubgroupSizes[5] = {requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize,
958                                                    requiredSubgroupSize, requiredSubgroupSize};
959         const internalDataStruct internalData   = {
960             &context,             //  const Context* context;
961             caseDef,              //  struct CaseDefinition caseDef;
962             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
963             true,                 //  bool isRequiredSubgroupSize;
964         };
965 
966         return subgroups::allStagesRequiredSubgroupSize(
967             context, VK_FORMAT_R32_UINT, DE_NULL, 0, &internalData, checkVertexPipelineStages, stages,
968             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
969             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
970             caseDef.pipelineShaderStageCreateFlags, requiredSubgroupSizes);
971     }
972 #ifndef CTS_USES_VULKANSC
973     else if (isAllRayTracingStages(caseDef.shaderStage))
974     {
975         const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
976         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
977             context.getSubgroupSizeControlProperties();
978         const uint32_t requiredSubgroupSize =
979             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
980         const vector<uint32_t> flags(6, caseDef.pipelineShaderStageCreateFlags);
981         const vector<uint32_t> requiredSubgroupSizes(6, requiredSubgroupSize);
982         const struct internalDataStruct internalData = {
983             &context,             //  const Context* context;
984             caseDef,              //  struct CaseDefinition caseDef;
985             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
986             true,                 //  bool isRequiredSubgroupSize;
987         };
988 
989         return subgroups::allRayTracingStagesRequiredSubgroupSize(context, VK_FORMAT_R32_UINT, DE_NULL, 0,
990                                                                   &internalData, checkVertexPipelineStages, stages,
991                                                                   flags.data(), requiredSubgroupSizes.data());
992     }
993 #endif // CTS_USES_VULKANSC
994     else
995         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
996 }
997 
noSSBOtestRequireSubgroupSize(Context & context,const CaseDefinition caseDef)998 TestStatus noSSBOtestRequireSubgroupSize(Context &context, const CaseDefinition caseDef)
999 {
1000 #ifndef CTS_USES_VULKANSC
1001     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1002         context.getSubgroupSizeControlProperties();
1003 #else
1004     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1005         context.getSubgroupSizeControlPropertiesEXT();
1006 #endif // CTS_USES_VULKANSC
1007     const uint32_t requiredSubgroupSize =
1008         getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
1009     const VkFormat format                  = VK_FORMAT_R32_UINT;
1010     const uint32_t &flags                  = caseDef.pipelineShaderStageCreateFlags;
1011     const uint32_t &size                   = requiredSubgroupSize;
1012     struct internalDataStruct internalData = {
1013         &context,
1014         caseDef,
1015         requiredSubgroupSize,
1016         true,
1017     };
1018 
1019     switch (caseDef.shaderStage)
1020     {
1021     case VK_SHADER_STAGE_VERTEX_BIT:
1022         return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
1023                                                                         checkVertexPipelineStages, flags, size);
1024     case VK_SHADER_STAGE_GEOMETRY_BIT:
1025         return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
1026                                                                           checkVertexPipelineStages, flags, size);
1027     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1028         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
1029             context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
1030     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1031         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
1032             context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
1033     case VK_SHADER_STAGE_FRAGMENT_BIT:
1034         return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData,
1035                                                                           checkFragmentPipelineStages, flags, size);
1036     default:
1037         TCU_THROW(InternalError, "Unhandled shader stage");
1038     }
1039 }
1040 
testSanitySubgroupSizeProperties(Context & context)1041 TestStatus testSanitySubgroupSizeProperties(Context &context)
1042 {
1043 #ifndef CTS_USES_VULKANSC
1044     VkPhysicalDeviceSubgroupSizeControlProperties subgroupSizeControlProperties;
1045     subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1046 #else
1047     VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
1048     subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
1049 #endif // CTS_USES_VULKANSC
1050 
1051     subgroupSizeControlProperties.pNext = DE_NULL;
1052 
1053     VkPhysicalDeviceSubgroupProperties subgroupProperties;
1054     subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1055     subgroupProperties.pNext = &subgroupSizeControlProperties;
1056 
1057     VkPhysicalDeviceProperties2 properties;
1058     properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1059     properties.pNext = &subgroupProperties;
1060 
1061     context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1062 
1063     if (subgroupProperties.subgroupSize > subgroupSizeControlProperties.maxSubgroupSize ||
1064         subgroupProperties.subgroupSize < subgroupSizeControlProperties.minSubgroupSize)
1065     {
1066         ostringstream error;
1067         error << "subgroupSize (" << subgroupProperties.subgroupSize << ") is not between maxSubgroupSize (";
1068         error << subgroupSizeControlProperties.maxSubgroupSize << ") and minSubgroupSize (";
1069         error << subgroupSizeControlProperties.minSubgroupSize << ")";
1070 
1071         return TestStatus::fail(error.str().c_str());
1072     }
1073 
1074     return TestStatus::pass("OK");
1075 }
1076 } // namespace
1077 
1078 namespace vkt
1079 {
1080 namespace subgroups
1081 {
createSubgroupsSizeControlTests(TestContext & testCtx)1082 TestCaseGroup *createSubgroupsSizeControlTests(TestContext &testCtx)
1083 {
1084     de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "size_control"));
1085     de::MovePtr<TestCaseGroup> framebufferGroup(new TestCaseGroup(testCtx, "framebuffer"));
1086     de::MovePtr<TestCaseGroup> computeGroup(new TestCaseGroup(testCtx, "compute"));
1087     de::MovePtr<TestCaseGroup> graphicsGroup(new TestCaseGroup(testCtx, "graphics"));
1088 #ifndef CTS_USES_VULKANSC
1089     de::MovePtr<TestCaseGroup> raytracingGroup(new TestCaseGroup(testCtx, "ray_tracing"));
1090     de::MovePtr<TestCaseGroup> meshGroup(new TestCaseGroup(testCtx, "mesh"));
1091 #endif // CTS_USES_VULKANSC
1092     de::MovePtr<TestCaseGroup> genericGroup(new TestCaseGroup(testCtx, "generic"));
1093     const VkShaderStageFlags fbStages[] = {
1094         VK_SHADER_STAGE_VERTEX_BIT,
1095         VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1096         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1097         VK_SHADER_STAGE_GEOMETRY_BIT,
1098         VK_SHADER_STAGE_FRAGMENT_BIT,
1099     };
1100 #ifndef CTS_USES_VULKANSC
1101     const VkShaderStageFlags meshStages[] = {
1102         VK_SHADER_STAGE_MESH_BIT_EXT,
1103         VK_SHADER_STAGE_TASK_BIT_EXT,
1104     };
1105 #endif // CTS_USES_VULKANSC
1106 
1107     // Test sanity of the subgroup size properties.
1108     {
1109         addFunctionCase(genericGroup.get(), "subgroup_size_properties", supportedCheck,
1110                         testSanitySubgroupSizeProperties);
1111     }
1112 
1113     const TestParams testParams[] = {{false, true, ""}, {true, false, "_spirv16"}, {true, true, "_flags_spirv16"}};
1114 
1115     for (const auto &params : testParams)
1116     {
1117         // Allow varying subgroup cases.
1118         const uint32_t flagsVary         = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1119         const CaseDefinition caseDefVary = {params.flagsEnabled ? flagsVary : 0u,
1120                                             VK_SHADER_STAGE_COMPUTE_BIT,
1121                                             false,
1122                                             REQUIRED_SUBGROUP_SIZE_NONE,
1123                                             de::SharedPtr<bool>(new bool),
1124                                             params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1125 
1126         addFunctionCaseWithPrograms(computeGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1127                                     supportedCheckFeatures, initPrograms, test, caseDefVary);
1128         addFunctionCaseWithPrograms(graphicsGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1129                                     supportedCheckFeaturesShader, initPrograms, test, caseDefVary);
1130 
1131         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1132         {
1133             const CaseDefinition caseDefStage = {params.flagsEnabled ? flagsVary : 0u,
1134                                                  fbStages[stageIndex],
1135                                                  false,
1136                                                  REQUIRED_SUBGROUP_SIZE_NONE,
1137                                                  de::SharedPtr<bool>(new bool),
1138                                                  params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1139 
1140             string name =
1141                 getShaderStageName(caseDefStage.shaderStage) + "_allow_varying_subgroup_size" + params.postfix;
1142             addFunctionCaseWithPrograms(framebufferGroup.get(), name, supportedCheckFeaturesShader,
1143                                         initFrameBufferPrograms, noSSBOtest, caseDefStage);
1144         }
1145 
1146 #ifndef CTS_USES_VULKANSC
1147         for (const auto &stage : meshStages)
1148         {
1149             const CaseDefinition caseDefMesh = {(params.flagsEnabled ? flagsVary : 0u),
1150                                                 stage,
1151                                                 false,
1152                                                 REQUIRED_SUBGROUP_SIZE_NONE,
1153                                                 de::SharedPtr<bool>(new bool),
1154                                                 (params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4)};
1155             const std::string name = getShaderStageName(stage) + "_allow_varying_subgroup_size" + params.postfix;
1156             addFunctionCaseWithPrograms(meshGroup.get(), name, supportedCheckFeatures, initPrograms, test, caseDefMesh);
1157         }
1158 #endif // CTS_USES_VULKANSC
1159 
1160         // Require full subgroups together with allow varying subgroup (only compute shaders).
1161         const uint32_t flagsFullVary = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT |
1162                                        VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1163         const CaseDefinition caseDefFullVary = {params.flagsEnabled ? flagsFullVary : 0u,
1164                                                 VK_SHADER_STAGE_COMPUTE_BIT,
1165                                                 true,
1166                                                 REQUIRED_SUBGROUP_SIZE_NONE,
1167                                                 de::SharedPtr<bool>(new bool),
1168                                                 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1169         addFunctionCaseWithPrograms(
1170             computeGroup.get(), "require_full_subgroups_allow_varying_subgroup_size" + params.postfix,
1171             supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDefFullVary);
1172 
1173         // Require full subgroups cases (only compute shaders).
1174         const uint32_t flagsFull         = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT;
1175         const CaseDefinition caseDefFull = {params.flagsEnabled ? flagsFull : 0u,
1176                                             VK_SHADER_STAGE_COMPUTE_BIT,
1177                                             true,
1178                                             REQUIRED_SUBGROUP_SIZE_NONE,
1179                                             de::SharedPtr<bool>(new bool),
1180                                             params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1181         addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups" + params.postfix,
1182                                     supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups,
1183                                     caseDefFull);
1184 
1185         // Tests to check setting a required subgroup size value, together with require full subgroups (only compute shaders).
1186         const CaseDefinition caseDefMaxFull = {params.flagsEnabled ? flagsFull : 0u,
1187                                                VK_SHADER_STAGE_COMPUTE_BIT,
1188                                                true,
1189                                                REQUIRED_SUBGROUP_SIZE_MAX,
1190                                                de::SharedPtr<bool>(new bool),
1191                                                params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1192         addFunctionCaseWithPrograms(
1193             computeGroup.get(), "required_subgroup_size_max_require_full_subgroups" + params.postfix,
1194             supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMaxFull);
1195 
1196         const CaseDefinition caseDefMinFull = {params.flagsEnabled ? flagsFull : 0u,
1197                                                VK_SHADER_STAGE_COMPUTE_BIT,
1198                                                true,
1199                                                REQUIRED_SUBGROUP_SIZE_MIN,
1200                                                de::SharedPtr<bool>(new bool),
1201                                                params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1202         addFunctionCaseWithPrograms(
1203             computeGroup.get(), "required_subgroup_size_min_require_full_subgroups" + params.postfix,
1204             supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMinFull);
1205 
1206         // Ray tracing cases with allow varying subgroup.
1207 #ifndef CTS_USES_VULKANSC
1208         const uint32_t flagsRayTracing            = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1209         const CaseDefinition caseDefAllRaytracing = {params.flagsEnabled ? flagsRayTracing : 0u,
1210                                                      SHADER_STAGE_ALL_RAY_TRACING,
1211                                                      false,
1212                                                      REQUIRED_SUBGROUP_SIZE_NONE,
1213                                                      de::SharedPtr<bool>(new bool),
1214                                                      params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4};
1215         addFunctionCaseWithPrograms(raytracingGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1216                                     supportedCheckFeaturesShader, initPrograms, test, caseDefAllRaytracing);
1217 #endif // CTS_USES_VULKANSC
1218     }
1219 
1220     // Tests to check setting a required subgroup size value.
1221     {
1222         const CaseDefinition caseDefAllGraphicsMax = {0u,
1223                                                       VK_SHADER_STAGE_ALL_GRAPHICS,
1224                                                       false,
1225                                                       REQUIRED_SUBGROUP_SIZE_MAX,
1226                                                       de::SharedPtr<bool>(new bool),
1227                                                       SPIRV_VERSION_1_3};
1228         addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader,
1229                                     initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMax);
1230         const CaseDefinition caseDefComputeMax = {0u,
1231                                                   VK_SHADER_STAGE_COMPUTE_BIT,
1232                                                   false,
1233                                                   REQUIRED_SUBGROUP_SIZE_MAX,
1234                                                   de::SharedPtr<bool>(new bool),
1235                                                   SPIRV_VERSION_1_3};
1236         addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max", supportedCheckFeatures,
1237                                     initPrograms, testRequireSubgroupSize, caseDefComputeMax);
1238 #ifndef CTS_USES_VULKANSC
1239         const CaseDefinition caseDefAllRaytracingMax = {0u,
1240                                                         SHADER_STAGE_ALL_RAY_TRACING,
1241                                                         false,
1242                                                         REQUIRED_SUBGROUP_SIZE_MAX,
1243                                                         de::SharedPtr<bool>(new bool),
1244                                                         SPIRV_VERSION_1_4};
1245         addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader,
1246                                     initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMax);
1247 #endif // CTS_USES_VULKANSC
1248 
1249         const CaseDefinition caseDefAllGraphicsMin = {0u,
1250                                                       VK_SHADER_STAGE_ALL_GRAPHICS,
1251                                                       false,
1252                                                       REQUIRED_SUBGROUP_SIZE_MIN,
1253                                                       de::SharedPtr<bool>(new bool),
1254                                                       SPIRV_VERSION_1_3};
1255         addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader,
1256                                     initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMin);
1257         const CaseDefinition caseDefComputeMin = {0u,
1258                                                   VK_SHADER_STAGE_COMPUTE_BIT,
1259                                                   false,
1260                                                   REQUIRED_SUBGROUP_SIZE_MIN,
1261                                                   de::SharedPtr<bool>(new bool),
1262                                                   SPIRV_VERSION_1_3};
1263         addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min", supportedCheckFeatures,
1264                                     initPrograms, testRequireSubgroupSize, caseDefComputeMin);
1265 #ifndef CTS_USES_VULKANSC
1266         const CaseDefinition caseDefAllRaytracingMin = {0u,
1267                                                         SHADER_STAGE_ALL_RAY_TRACING,
1268                                                         false,
1269                                                         REQUIRED_SUBGROUP_SIZE_MIN,
1270                                                         de::SharedPtr<bool>(new bool),
1271                                                         SPIRV_VERSION_1_4};
1272         addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader,
1273                                     initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMin);
1274 #endif // CTS_USES_VULKANSC
1275         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1276         {
1277             const CaseDefinition caseDefStageMax = {0u,
1278                                                     fbStages[stageIndex],
1279                                                     false,
1280                                                     REQUIRED_SUBGROUP_SIZE_MAX,
1281                                                     de::SharedPtr<bool>(new bool),
1282                                                     SPIRV_VERSION_1_3};
1283             addFunctionCaseWithPrograms(
1284                 framebufferGroup.get(), getShaderStageName(caseDefStageMax.shaderStage) + "_required_subgroup_size_max",
1285                 supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMax);
1286             const CaseDefinition caseDefStageMin = {0u,
1287                                                     fbStages[stageIndex],
1288                                                     false,
1289                                                     REQUIRED_SUBGROUP_SIZE_MIN,
1290                                                     de::SharedPtr<bool>(new bool),
1291                                                     SPIRV_VERSION_1_3};
1292             addFunctionCaseWithPrograms(
1293                 framebufferGroup.get(), getShaderStageName(caseDefStageMin.shaderStage) + "_required_subgroup_size_min",
1294                 supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMin);
1295         }
1296 
1297 #ifndef CTS_USES_VULKANSC
1298         for (const auto &stage : meshStages)
1299         {
1300             const auto stageName = getShaderStageName(stage);
1301 
1302             const CaseDefinition caseDefMeshMax = {
1303                 0u, stage, false, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1304             addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_max_" + stageName,
1305                                         supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMax);
1306             const CaseDefinition caseDefMeshMin = {
1307                 0u, stage, false, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1308             addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_min_" + stageName,
1309                                         supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMin);
1310         }
1311 #endif // CTS_USES_VULKANSC
1312     }
1313 
1314     group->addChild(genericGroup.release());
1315     group->addChild(graphicsGroup.release());
1316     group->addChild(computeGroup.release());
1317     group->addChild(framebufferGroup.release());
1318 #ifndef CTS_USES_VULKANSC
1319     group->addChild(raytracingGroup.release());
1320     group->addChild(meshGroup.release());
1321 #endif // CTS_USES_VULKANSC
1322 
1323     return group.release();
1324 }
1325 
1326 } // namespace subgroups
1327 } // namespace vkt
1328