xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBasicTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsBasicTests.hpp"
27 #include "vktSubgroupsTestsUtils.hpp"
28 
29 #include "tcuStringTemplate.hpp"
30 
31 #include <string>
32 #include <vector>
33 
34 using namespace tcu;
35 using namespace std;
36 using namespace vk;
37 using namespace vkt;
38 
39 namespace
40 {
41 enum OpType
42 {
43     OPTYPE_ELECT = 0,
44     OPTYPE_SUBGROUP_BARRIER,
45     OPTYPE_SUBGROUP_MEMORY_BARRIER,
46     OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
47     OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
48     OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
49     OPTYPE_LAST
50 };
51 
52 struct CaseDefinition
53 {
54     OpType opType;
55     VkShaderStageFlags shaderStage;
56     de::SharedPtr<bool> geometryPointSizeSupported;
57     bool requiredSubgroupSize;
58 };
59 
60 static const uint32_t ELECTED_VALUE          = 42u;
61 static const uint32_t UNELECTED_VALUE        = 13u;
62 static const VkDeviceSize SHADER_BUFFER_SIZE = 4096ull; // min(maxUniformBufferRange, maxImageDimension1D)
63 
_checkFragmentSubgroupBarriersNoSSBO(vector<const void * > datas,uint32_t width,uint32_t height,bool withImage)64 static bool _checkFragmentSubgroupBarriersNoSSBO(vector<const void *> datas, uint32_t width, uint32_t height,
65                                                  bool withImage)
66 {
67     const float *const resultData = reinterpret_cast<const float *>(datas[0]);
68 
69     for (uint32_t x = 0u; x < width; ++x)
70     {
71         for (uint32_t y = 0u; y < height; ++y)
72         {
73             const uint32_t ndx = (x * height + y) * 4u;
74 
75             if (!withImage && 0.0f == resultData[ndx])
76             {
77                 return false;
78             }
79             else if (1.0f == resultData[ndx + 2])
80             {
81                 if (resultData[ndx] != resultData[ndx + 1])
82                 {
83                     return false;
84                 }
85             }
86             else if (resultData[ndx] != resultData[ndx + 3])
87             {
88                 return false;
89             }
90         }
91     }
92 
93     return true;
94 }
95 
checkFragmentSubgroupBarriersNoSSBO(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t height,uint32_t)96 static bool checkFragmentSubgroupBarriersNoSSBO(const void *internalData, vector<const void *> datas, uint32_t width,
97                                                 uint32_t height, uint32_t)
98 {
99     DE_UNREF(internalData);
100 
101     return _checkFragmentSubgroupBarriersNoSSBO(datas, width, height, false);
102 }
103 
checkFragmentSubgroupBarriersWithImageNoSSBO(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t height,uint32_t)104 static bool checkFragmentSubgroupBarriersWithImageNoSSBO(const void *internalData, vector<const void *> datas,
105                                                          uint32_t width, uint32_t height, uint32_t)
106 {
107     DE_UNREF(internalData);
108 
109     return _checkFragmentSubgroupBarriersNoSSBO(datas, width, height, true);
110 }
111 
checkVertexPipelineStagesSubgroupElectNoSSBO(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)112 static bool checkVertexPipelineStagesSubgroupElectNoSSBO(const void *internalData, vector<const void *> datas,
113                                                          uint32_t width, uint32_t)
114 {
115     DE_UNREF(internalData);
116 
117     const float *const resultData = reinterpret_cast<const float *>(datas[0]);
118     float poisonValuesFound       = 0.0f;
119     float numSubgroupsUsed        = 0.0f;
120 
121     for (uint32_t x = 0; x < width; ++x)
122     {
123         uint32_t val = static_cast<uint32_t>(resultData[x * 2]);
124         numSubgroupsUsed += resultData[x * 2 + 1];
125 
126         switch (val)
127         {
128         default:
129             // some garbage value was found!
130             return false;
131         case UNELECTED_VALUE:
132             break;
133         case ELECTED_VALUE:
134             poisonValuesFound += 1.0f;
135             break;
136         }
137     }
138 
139     return numSubgroupsUsed == poisonValuesFound;
140 }
141 
checkVertexPipelineStagesSubgroupElect(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t,bool multipleCallsPossible)142 static bool checkVertexPipelineStagesSubgroupElect(const void *internalData, vector<const void *> datas, uint32_t width,
143                                                    uint32_t, bool multipleCallsPossible)
144 {
145     DE_UNREF(internalData);
146 
147     const uint32_t *const resultData = reinterpret_cast<const uint32_t *>(datas[0]);
148     uint32_t poisonValuesFound       = 0;
149 
150     for (uint32_t x = 0; x < width; ++x)
151     {
152         uint32_t val = resultData[x];
153 
154         switch (val)
155         {
156         default:
157             // some garbage value was found!
158             return false;
159         case UNELECTED_VALUE:
160             break;
161         case ELECTED_VALUE:
162             poisonValuesFound++;
163             break;
164         }
165     }
166 
167     // we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
168     const uint32_t numSubgroupsUsed = *reinterpret_cast<const uint32_t *>(datas[1]);
169 
170     return (multipleCallsPossible ? (numSubgroupsUsed >= poisonValuesFound) : (numSubgroupsUsed == poisonValuesFound));
171 }
172 
checkVertexPipelineStagesSubgroupBarriers(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)173 static bool checkVertexPipelineStagesSubgroupBarriers(const void *internalData, vector<const void *> datas,
174                                                       uint32_t width, uint32_t)
175 {
176     DE_UNREF(internalData);
177 
178     const uint32_t *const resultData = reinterpret_cast<const uint32_t *>(datas[0]);
179 
180     // We used this SSBO to generate our unique value!
181     const uint32_t ref = *reinterpret_cast<const uint32_t *>(datas[3]);
182 
183     for (uint32_t x = 0; x < width; ++x)
184     {
185         uint32_t val = resultData[x];
186 
187         if (val != ref)
188             return false;
189     }
190 
191     return true;
192 }
193 
_checkVertexPipelineStagesSubgroupBarriersNoSSBO(vector<const void * > datas,uint32_t width,bool withImage)194 static bool _checkVertexPipelineStagesSubgroupBarriersNoSSBO(vector<const void *> datas, uint32_t width, bool withImage)
195 {
196     const float *const resultData = reinterpret_cast<const float *>(datas[0]);
197 
198     for (uint32_t x = 0u; x < width; ++x)
199     {
200         const uint32_t ndx = x * 4u;
201         if (!withImage && 0.0f == resultData[ndx])
202         {
203             return false;
204         }
205         else if (1.0f == resultData[ndx + 2])
206         {
207             if (resultData[ndx] != resultData[ndx + 1])
208                 return false;
209         }
210         else if (resultData[ndx] != resultData[ndx + 3])
211         {
212             return false;
213         }
214     }
215 
216     return true;
217 }
218 
checkVertexPipelineStagesSubgroupBarriersNoSSBO(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)219 static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(const void *internalData, vector<const void *> datas,
220                                                             uint32_t width, uint32_t)
221 {
222     DE_UNREF(internalData);
223 
224     return _checkVertexPipelineStagesSubgroupBarriersNoSSBO(datas, width, false);
225 }
226 
checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)227 static bool checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO(const void *internalData,
228                                                                      vector<const void *> datas, uint32_t width,
229                                                                      uint32_t)
230 {
231     DE_UNREF(internalData);
232 
233     return _checkVertexPipelineStagesSubgroupBarriersNoSSBO(datas, width, true);
234 }
235 
_checkTessellationEvaluationSubgroupBarriersNoSSBO(vector<const void * > datas,uint32_t width,uint32_t,bool withImage)236 static bool _checkTessellationEvaluationSubgroupBarriersNoSSBO(vector<const void *> datas, uint32_t width, uint32_t,
237                                                                bool withImage)
238 {
239     const float *const resultData = reinterpret_cast<const float *>(datas[0]);
240 
241     for (uint32_t x = 0u; x < width; ++x)
242     {
243         const uint32_t ndx = x * 4u;
244 
245         if (!withImage && 0.0f == resultData[ndx])
246         {
247             return false;
248         }
249         else if (0.0f == resultData[ndx + 2] && resultData[ndx] != resultData[ndx + 3])
250         {
251             return false;
252         }
253     }
254 
255     return true;
256 }
257 
checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t height)258 static bool checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO(const void *internalData,
259                                                                        vector<const void *> datas, uint32_t width,
260                                                                        uint32_t height)
261 {
262     DE_UNREF(internalData);
263 
264     return _checkTessellationEvaluationSubgroupBarriersNoSSBO(datas, width, height, true);
265 }
266 
checkTessellationEvaluationSubgroupBarriersNoSSBO(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t height)267 static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(const void *internalData, vector<const void *> datas,
268                                                               uint32_t width, uint32_t height)
269 {
270     DE_UNREF(internalData);
271 
272     return _checkTessellationEvaluationSubgroupBarriersNoSSBO(datas, width, height, false);
273 }
274 
checkComputeOrMeshSubgroupElect(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)275 static bool checkComputeOrMeshSubgroupElect(const void *internalData, vector<const void *> datas,
276                                             const uint32_t numWorkgroups[3], const uint32_t localSize[3], uint32_t)
277 {
278     DE_UNREF(internalData);
279 
280     return subgroups::checkComputeOrMesh(datas, numWorkgroups, localSize, 1);
281 }
282 
checkComputeOrMeshSubgroupBarriers(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)283 static bool checkComputeOrMeshSubgroupBarriers(const void *internalData, vector<const void *> datas,
284                                                const uint32_t numWorkgroups[3], const uint32_t localSize[3], uint32_t)
285 {
286     DE_UNREF(internalData);
287 
288     // We used this SSBO to generate our unique value!
289     const uint32_t ref = *reinterpret_cast<const uint32_t *>(datas[2]);
290 
291     return subgroups::checkComputeOrMesh(datas, numWorkgroups, localSize, ref);
292 }
293 
getOpTypeName(OpType opType)294 string getOpTypeName(OpType opType)
295 {
296     switch (opType)
297     {
298     case OPTYPE_ELECT:
299         return "subgroupElect";
300     case OPTYPE_SUBGROUP_BARRIER:
301         return "subgroupBarrier";
302     case OPTYPE_SUBGROUP_MEMORY_BARRIER:
303         return "subgroupMemoryBarrier";
304     case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
305         return "subgroupMemoryBarrierBuffer";
306     case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
307         return "subgroupMemoryBarrierShared";
308     case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
309         return "subgroupMemoryBarrierImage";
310     default:
311         TCU_THROW(InternalError, "Unsupported op type");
312     }
313 }
314 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)315 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
316 {
317     const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
318     const SpirVAsmBuildOptions buildOptionsSpr(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
319 
320     if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
321     {
322         /*
323             "layout(location = 0) in vec4 in_color;\n"
324             "layout(location = 0) out vec4 out_color;\n"
325             "void main()\n"
326             {\n"
327             "    out_color = in_color;\n"
328             "}\n";
329         */
330         const string fragment = "; SPIR-V\n"
331                                 "; Version: 1.3\n"
332                                 "; Generator: Khronos Glslang Reference Front End; 2\n"
333                                 "; Bound: 13\n"
334                                 "; Schema: 0\n"
335                                 "OpCapability Shader\n"
336                                 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
337                                 "OpMemoryModel Logical GLSL450\n"
338                                 "OpEntryPoint Fragment %4 \"main\" %9 %11\n"
339                                 "OpExecutionMode %4 OriginUpperLeft\n"
340                                 "OpDecorate %9 Location 0\n"
341                                 "OpDecorate %11 Location 0\n"
342                                 "%2 = OpTypeVoid\n"
343                                 "%3 = OpTypeFunction %2\n"
344                                 "%6 = OpTypeFloat 32\n"
345                                 "%7 = OpTypeVector %6 4\n"
346                                 "%8 = OpTypePointer Output %7\n"
347                                 "%9 = OpVariable %8 Output\n"
348                                 "%10 = OpTypePointer Input %7\n"
349                                 "%11 = OpVariable %10 Input\n"
350                                 "%4 = OpFunction %2 None %3\n"
351                                 "%5 = OpLabel\n"
352                                 "%12 = OpLoad %7 %11\n"
353                                 "OpStore %9 %12\n"
354                                 "OpReturn\n"
355                                 "OpFunctionEnd\n";
356 
357         programCollection.spirvAsmSources.add("fragment") << fragment;
358     }
359     if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
360     {
361         /*
362             "#version 450\n"
363             "void main (void)\n"
364             "{\n"
365             "  vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);\n"
366             "  gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
367             "  gl_PointSize = 1.0f;\n"
368             "}\n";
369         */
370         const string vertex = "; SPIR-V\n"
371                               "; Version: 1.3\n"
372                               "; Generator: Khronos Glslang Reference Front End; 2\n"
373                               "; Bound: 44\n"
374                               "; Schema: 0\n"
375                               "OpCapability Shader\n"
376                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
377                               "OpMemoryModel Logical GLSL450\n"
378                               "OpEntryPoint Vertex %4 \"main\" %12 %29\n"
379                               "OpDecorate %12 BuiltIn VertexIndex\n"
380                               "OpMemberDecorate %27 0 BuiltIn Position\n"
381                               "OpMemberDecorate %27 1 BuiltIn PointSize\n"
382                               "OpMemberDecorate %27 2 BuiltIn ClipDistance\n"
383                               "OpMemberDecorate %27 3 BuiltIn CullDistance\n"
384                               "OpDecorate %27 Block\n"
385                               "%2 = OpTypeVoid\n"
386                               "%3 = OpTypeFunction %2\n"
387                               "%6 = OpTypeFloat 32\n"
388                               "%7 = OpTypeVector %6 2\n"
389                               "%8 = OpTypePointer Function %7\n"
390                               "%10 = OpTypeInt 32 1\n"
391                               "%11 = OpTypePointer Input %10\n"
392                               "%12 = OpVariable %11 Input\n"
393                               "%14 = OpConstant %10 1\n"
394                               "%16 = OpConstant %10 2\n"
395                               "%23 = OpTypeVector %6 4\n"
396                               "%24 = OpTypeInt 32 0\n"
397                               "%25 = OpConstant %24 1\n"
398                               "%26 = OpTypeArray %6 %25\n"
399                               "%27 = OpTypeStruct %23 %6 %26 %26\n"
400                               "%28 = OpTypePointer Output %27\n"
401                               "%29 = OpVariable %28 Output\n"
402                               "%30 = OpConstant %10 0\n"
403                               "%32 = OpConstant %6 2\n"
404                               "%34 = OpConstant %6 -1\n"
405                               "%37 = OpConstant %6 0\n"
406                               "%38 = OpConstant %6 1\n"
407                               "%42 = OpTypePointer Output %23\n"
408                               "%44 = OpTypePointer Output %6\n"
409                               "%4 = OpFunction %2 None %3\n"
410                               "%5 = OpLabel\n"
411                               "%9 = OpVariable %8 Function\n"
412                               "%13 = OpLoad %10 %12\n"
413                               "%15 = OpShiftLeftLogical %10 %13 %14\n"
414                               "%17 = OpBitwiseAnd %10 %15 %16\n"
415                               "%18 = OpConvertSToF %6 %17\n"
416                               "%19 = OpLoad %10 %12\n"
417                               "%20 = OpBitwiseAnd %10 %19 %16\n"
418                               "%21 = OpConvertSToF %6 %20\n"
419                               "%22 = OpCompositeConstruct %7 %18 %21\n"
420                               "OpStore %9 %22\n"
421                               "%31 = OpLoad %7 %9\n"
422                               "%33 = OpVectorTimesScalar %7 %31 %32\n"
423                               "%35 = OpCompositeConstruct %7 %34 %34\n"
424                               "%36 = OpFAdd %7 %33 %35\n"
425                               "%39 = OpCompositeExtract %6 %36 0\n"
426                               "%40 = OpCompositeExtract %6 %36 1\n"
427                               "%41 = OpCompositeConstruct %23 %39 %40 %37 %38\n"
428                               "%43 = OpAccessChain %42 %29 %30\n"
429                               "OpStore %43 %41\n"
430                               "%45 = OpAccessChain %44 %29 %14\n"
431                               "OpStore %45 %38\n"
432                               "OpReturn\n"
433                               "OpFunctionEnd\n";
434 
435         programCollection.spirvAsmSources.add("vert") << vertex;
436     }
437     else if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
438     {
439         subgroups::setVertexShaderFrameBuffer(programCollection);
440     }
441 
442     if (OPTYPE_ELECT == caseDef.opType)
443     {
444         ostringstream electedValue;
445         ostringstream unelectedValue;
446 
447         electedValue << ELECTED_VALUE;
448         unelectedValue << UNELECTED_VALUE;
449 
450         if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
451         {
452             /*
453                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
454                 "layout(location = 0) out vec4 out_color;\n"
455                 "layout(location = 0) in highp vec4 in_position;\n"
456                 "\n"
457                 "void main (void)\n"
458                 "{\n"
459                 "  if (subgroupElect())\n"
460                 "  {\n"
461                 "    out_color.r = " << ELECTED_VALUE << ";\n"
462                 "    out_color.g = 1.0f;\n"
463                 "  }\n"
464                 "  else\n"
465                 "  {\n"
466                 "    out_color.r = " << UNELECTED_VALUE << ";\n"
467                 "    out_color.g = 0.0f;\n"
468                 "  }\n"
469                 "  gl_Position = in_position;\n"
470                 "  gl_PointSize = 1.0f;\n"
471                 "}\n";
472             */
473             const string vertex = "; SPIR-V\n"
474                                   "; Version: 1.3\n"
475                                   "; Generator: Khronos Glslang Reference Front End; 2\n"
476                                   "; Bound: 38\n"
477                                   "; Schema: 0\n"
478                                   "OpCapability Shader\n"
479                                   "OpCapability GroupNonUniform\n"
480                                   "%1 = OpExtInstImport \"GLSL.std.450\"\n"
481                                   "OpMemoryModel Logical GLSL450\n"
482                                   "OpEntryPoint Vertex %4 \"main\" %15 %31 %35\n"
483                                   "OpDecorate %15 Location 0\n"
484                                   "OpMemberDecorate %29 0 BuiltIn Position\n"
485                                   "OpMemberDecorate %29 1 BuiltIn PointSize\n"
486                                   "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
487                                   "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
488                                   "OpDecorate %29 Block\n"
489                                   "OpDecorate %35 Location 0\n"
490                                   "%2 = OpTypeVoid\n"
491                                   "%3 = OpTypeFunction %2\n"
492                                   "%6 = OpTypeBool\n"
493                                   "%7 = OpTypeInt 32 0\n"
494                                   "%8 = OpConstant %7 3\n"
495                                   "%12 = OpTypeFloat 32\n"
496                                   "%13 = OpTypeVector %12 4\n"
497                                   "%14 = OpTypePointer Output %13\n"
498                                   "%15 = OpVariable %14 Output\n"
499                                   "%16 = OpConstant %12 " +
500                                   electedValue.str() +
501                                   "\n"
502                                   "%17 = OpConstant %7 0\n"
503                                   "%18 = OpTypePointer Output %12\n"
504                                   "%20 = OpConstant %12 1\n"
505                                   "%21 = OpConstant %7 1\n"
506                                   "%24 = OpConstant %12 " +
507                                   unelectedValue.str() +
508                                   "\n"
509                                   "%26 = OpConstant %12 0\n"
510                                   "%28 = OpTypeArray %12 %21\n"
511                                   "%29 = OpTypeStruct %13 %12 %28 %28\n"
512                                   "%30 = OpTypePointer Output %29\n"
513                                   "%31 = OpVariable %30 Output\n"
514                                   "%32 = OpTypeInt 32 1\n"
515                                   "%33 = OpConstant %32 0\n"
516                                   "%34 = OpTypePointer Input %13\n"
517                                   "%35 = OpVariable %34 Input\n"
518                                   "%38 = OpConstant %32 1\n"
519                                   "%4 = OpFunction %2 None %3\n"
520                                   "%5 = OpLabel\n"
521                                   "%9 = OpGroupNonUniformElect %6 %8\n"
522                                   "OpSelectionMerge %11 None\n"
523                                   "OpBranchConditional %9 %10 %23\n"
524                                   "%10 = OpLabel\n"
525                                   "%19 = OpAccessChain %18 %15 %17\n"
526                                   "OpStore %19 %16\n"
527                                   "%22 = OpAccessChain %18 %15 %21\n"
528                                   "OpStore %22 %20\n"
529                                   "OpBranch %11\n"
530                                   "%23 = OpLabel\n"
531                                   "%25 = OpAccessChain %18 %15 %17\n"
532                                   "OpStore %25 %24\n"
533                                   "%27 = OpAccessChain %18 %15 %21\n"
534                                   "OpStore %27 %26\n"
535                                   "OpBranch %11\n"
536                                   "%11 = OpLabel\n"
537                                   "%36 = OpLoad %13 %35\n"
538                                   "%37 = OpAccessChain %14 %31 %33\n"
539                                   "OpStore %37 %36\n"
540                                   "%39 = OpAccessChain %18 %31 %38\n"
541                                   "OpStore %39 %20\n"
542                                   "OpReturn\n"
543                                   "OpFunctionEnd\n";
544 
545             programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
546         }
547         else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
548         {
549             /*
550                 "#version 450\n"
551                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
552                 "layout(points) in;\n"
553                 "layout(points, max_vertices = 1) out;\n"
554                 "layout(location = 0) out vec4 out_color;\n"
555                 "void main (void)\n"
556                 "{\n"
557                 "  if (subgroupElect())\n"
558                 "  {\n"
559                 "    out_color.r = " << ELECTED_VALUE << ";\n"
560                 "    out_color.g = 1.0f;\n"
561                 "  }\n"
562                 "  else\n"
563                 "  {\n"
564                 "    out_color.r = " << UNELECTED_VALUE << ";\n"
565                 "    out_color.g = 0.0f;\n"
566                 "  }\n"
567                 "  gl_Position = gl_in[0].gl_Position;\n"
568                 "  gl_PointSize = gl_in[0].gl_PointSize;\n"
569                 "  EmitVertex();\n"
570                 "  EndPrimitive();\n"
571                 "}\n";
572             */
573             ostringstream geometry;
574 
575             geometry << "; SPIR-V\n"
576                      << "; Version: 1.3\n"
577                      << "; Generator: Khronos Glslang Reference Front End; 2\n"
578                      << "; Bound: 42\n"
579                      << "; Schema: 0\n"
580                      << "OpCapability Geometry\n"
581                      << (*caseDef.geometryPointSizeSupported ? "OpCapability GeometryPointSize\n" : "")
582                      << "OpCapability GroupNonUniform\n"
583                      << "%1 = OpExtInstImport \"GLSL.std.450\"\n"
584                      << "OpMemoryModel Logical GLSL450\n"
585                      << "OpEntryPoint Geometry %4 \"main\" %15 %31 %37\n"
586                      << "OpExecutionMode %4 InputPoints\n"
587                      << "OpExecutionMode %4 Invocations 1\n"
588                      << "OpExecutionMode %4 OutputPoints\n"
589                      << "OpExecutionMode %4 OutputVertices 1\n"
590                      << "OpDecorate %15 Location 0\n"
591                      << "OpMemberDecorate %29 0 BuiltIn Position\n"
592                      << "OpMemberDecorate %29 1 BuiltIn PointSize\n"
593                      << "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
594                      << "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
595                      << "OpDecorate %29 Block\n"
596                      << "OpMemberDecorate %34 0 BuiltIn Position\n"
597                      << "OpMemberDecorate %34 1 BuiltIn PointSize\n"
598                      << "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
599                      << "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
600                      << "OpDecorate %34 Block\n"
601                      << "%2 = OpTypeVoid\n"
602                      << "%3 = OpTypeFunction %2\n"
603                      << "%6 = OpTypeBool\n"
604                      << "%7 = OpTypeInt 32 0\n"
605                      << "%8 = OpConstant %7 3\n"
606                      << "%12 = OpTypeFloat 32\n"
607                      << "%13 = OpTypeVector %12 4\n"
608                      << "%14 = OpTypePointer Output %13\n"
609                      << "%15 = OpVariable %14 Output\n"
610                      << "%16 = OpConstant %12 " << electedValue.str() << "\n"
611                      << "%17 = OpConstant %7 0\n"
612                      << "%18 = OpTypePointer Output %12\n"
613                      << "%20 = OpConstant %12 1\n"
614                      << "%21 = OpConstant %7 1\n"
615                      << "%24 = OpConstant %12 " << unelectedValue.str() << "\n"
616                      << "%26 = OpConstant %12 0\n"
617                      << "%28 = OpTypeArray %12 %21\n"
618                      << "%29 = OpTypeStruct %13 %12 %28 %28\n"
619                      << "%30 = OpTypePointer Output %29\n"
620                      << "%31 = OpVariable %30 Output\n"
621                      << "%32 = OpTypeInt 32 1\n"
622                      << "%33 = OpConstant %32 0\n"
623                      << "%34 = OpTypeStruct %13 %12 %28 %28\n"
624                      << "%35 = OpTypeArray %34 %21\n"
625                      << "%36 = OpTypePointer Input %35\n"
626                      << "%37 = OpVariable %36 Input\n"
627                      << "%38 = OpTypePointer Input %13\n"
628                      << (*caseDef.geometryPointSizeSupported ? "%42 = OpConstant %32 1\n"
629                                                                "%43 = OpTypePointer Input %12\n"
630                                                                "%44 = OpTypePointer Output %12\n" :
631                                                                "")
632                      << "%4 = OpFunction %2 None %3\n"
633                      << "%5 = OpLabel\n"
634                      << "%9 = OpGroupNonUniformElect %6 %8\n"
635                      << "OpSelectionMerge %11 None\n"
636                      << "OpBranchConditional %9 %10 %23\n"
637                      << "%10 = OpLabel\n"
638                      << "%19 = OpAccessChain %18 %15 %17\n"
639                      << "OpStore %19 %16\n"
640                      << "%22 = OpAccessChain %18 %15 %21\n"
641                      << "OpStore %22 %20\n"
642                      << "OpBranch %11\n"
643                      << "%23 = OpLabel\n"
644                      << "%25 = OpAccessChain %18 %15 %17\n"
645                      << "OpStore %25 %24\n"
646                      << "%27 = OpAccessChain %18 %15 %21\n"
647                      << "OpStore %27 %26\n"
648                      << "OpBranch %11\n"
649                      << "%11 = OpLabel\n"
650                      << "%39 = OpAccessChain %38 %37 %33 %33\n"
651                      << "%40 = OpLoad %13 %39\n"
652                      << "%41 = OpAccessChain %14 %31 %33\n"
653                      << "OpStore %41 %40\n"
654                      << (*caseDef.geometryPointSizeSupported ? "%45 = OpAccessChain %43 %37 %33 %42\n"
655                                                                "%46 = OpLoad %12 %45\n"
656                                                                "%47 = OpAccessChain %44 %31 %42\n"
657                                                                "OpStore %47 %46\n" :
658                                                                "")
659                      << "OpEmitVertex\n"
660                      << "OpEndPrimitive\n"
661                      << "OpReturn\n"
662                      << "OpFunctionEnd\n";
663 
664             programCollection.spirvAsmSources.add("geometry") << geometry.str() << buildOptionsSpr;
665         }
666         else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
667         {
668             /*
669                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
670                 << "#extension GL_EXT_tessellation_shader : require\n"
671                 << "layout(vertices = 2) out;\n"
672                 << "void main (void)\n"
673                 << "{\n"
674                 << "  if (gl_InvocationID == 0)\n"
675                 << "  {\n"
676                 << "    gl_TessLevelOuter[0] = 1.0f;\n"
677                 << "    gl_TessLevelOuter[1] = 1.0f;\n"
678                 << "  }\n"
679                 << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
680                 << "}\n";
681             */
682             const string controlSource = "; SPIR-V\n"
683                                          "; Version: 1.3\n"
684                                          "; Generator: Khronos Glslang Reference Front End; 2\n"
685                                          "; Bound: 46\n"
686                                          "; Schema: 0\n"
687                                          "OpCapability Tessellation\n"
688                                          "%1 = OpExtInstImport \"GLSL.std.450\"\n"
689                                          "OpMemoryModel Logical GLSL450\n"
690                                          "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
691                                          "OpExecutionMode %4 OutputVertices 2\n"
692                                          "OpDecorate %8 BuiltIn InvocationId\n"
693                                          "OpDecorate %20 Patch\n"
694                                          "OpDecorate %20 BuiltIn TessLevelOuter\n"
695                                          "OpMemberDecorate %29 0 BuiltIn Position\n"
696                                          "OpMemberDecorate %29 1 BuiltIn PointSize\n"
697                                          "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
698                                          "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
699                                          "OpDecorate %29 Block\n"
700                                          "OpMemberDecorate %35 0 BuiltIn Position\n"
701                                          "OpMemberDecorate %35 1 BuiltIn PointSize\n"
702                                          "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
703                                          "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
704                                          "OpDecorate %35 Block\n"
705                                          "%2 = OpTypeVoid\n"
706                                          "%3 = OpTypeFunction %2\n"
707                                          "%6 = OpTypeInt 32 1\n"
708                                          "%7 = OpTypePointer Input %6\n"
709                                          "%8 = OpVariable %7 Input\n"
710                                          "%10 = OpConstant %6 0\n"
711                                          "%11 = OpTypeBool\n"
712                                          "%15 = OpTypeFloat 32\n"
713                                          "%16 = OpTypeInt 32 0\n"
714                                          "%17 = OpConstant %16 4\n"
715                                          "%18 = OpTypeArray %15 %17\n"
716                                          "%19 = OpTypePointer Output %18\n"
717                                          "%20 = OpVariable %19 Output\n"
718                                          "%21 = OpConstant %15 1\n"
719                                          "%22 = OpTypePointer Output %15\n"
720                                          "%24 = OpConstant %6 1\n"
721                                          "%26 = OpTypeVector %15 4\n"
722                                          "%27 = OpConstant %16 1\n"
723                                          "%28 = OpTypeArray %15 %27\n"
724                                          "%29 = OpTypeStruct %26 %15 %28 %28\n"
725                                          "%30 = OpConstant %16 2\n"
726                                          "%31 = OpTypeArray %29 %30\n"
727                                          "%32 = OpTypePointer Output %31\n"
728                                          "%33 = OpVariable %32 Output\n"
729                                          "%35 = OpTypeStruct %26 %15 %28 %28\n"
730                                          "%36 = OpConstant %16 32\n"
731                                          "%37 = OpTypeArray %35 %36\n"
732                                          "%38 = OpTypePointer Input %37\n"
733                                          "%39 = OpVariable %38 Input\n"
734                                          "%41 = OpTypePointer Input %26\n"
735                                          "%44 = OpTypePointer Output %26\n"
736                                          "%4 = OpFunction %2 None %3\n"
737                                          "%5 = OpLabel\n"
738                                          "%9 = OpLoad %6 %8\n"
739                                          "%12 = OpIEqual %11 %9 %10\n"
740                                          "OpSelectionMerge %14 None\n"
741                                          "OpBranchConditional %12 %13 %14\n"
742                                          "%13 = OpLabel\n"
743                                          "%23 = OpAccessChain %22 %20 %10\n"
744                                          "OpStore %23 %21\n"
745                                          "%25 = OpAccessChain %22 %20 %24\n"
746                                          "OpStore %25 %21\n"
747                                          "OpBranch %14\n"
748                                          "%14 = OpLabel\n"
749                                          "%34 = OpLoad %6 %8\n"
750                                          "%40 = OpLoad %6 %8\n"
751                                          "%42 = OpAccessChain %41 %39 %40 %10\n"
752                                          "%43 = OpLoad %26 %42\n"
753                                          "%45 = OpAccessChain %44 %33 %34 %10\n"
754                                          "OpStore %45 %43\n"
755                                          "OpReturn\n"
756                                          "OpFunctionEnd\n";
757 
758             programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
759 
760             /*
761                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
762                 "#extension GL_EXT_tessellation_shader : require\n"
763                 "layout(isolines, equal_spacing, ccw ) in;\n"
764                 "layout(location = 0) out vec4 out_color;\n"
765                 "\n"
766                 "void main (void)\n"
767                 "{\n"
768                 "  if (subgroupElect())\n"
769                 "  {\n"
770                 "    out_color.r = " << 2 * ELECTED_VALUE - UNELECTED_VALUE << ";\n"
771                 "    out_color.g = 2.0f;\n"
772                 "  }\n"
773                 "  else\n"
774                 "  {\n"
775                 "    out_color.r = " << UNELECTED_VALUE << ";\n"
776                 "    out_color.g = 0.0f;\n"
777                 "  }\n"
778                 "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
779                 "}\n";
780             */
781 
782             const string evaluationSource = "; SPIR-V\n"
783                                             "; Version: 1.3\n"
784                                             "; Generator: Khronos Glslang Reference Front End; 2\n"
785                                             "; Bound: 54\n"
786                                             "; Schema: 0\n"
787                                             "OpCapability Tessellation\n"
788                                             "OpCapability GroupNonUniform\n"
789                                             "%1 = OpExtInstImport \"GLSL.std.450\"\n"
790                                             "OpMemoryModel Logical GLSL450\n"
791                                             "OpEntryPoint TessellationEvaluation %4 \"main\" %15 %31 %38 %47\n"
792                                             "OpExecutionMode %4 Isolines\n"
793                                             "OpExecutionMode %4 SpacingEqual\n"
794                                             "OpExecutionMode %4 VertexOrderCcw\n"
795                                             "OpDecorate %15 Location 0\n"
796                                             "OpMemberDecorate %29 0 BuiltIn Position\n"
797                                             "OpMemberDecorate %29 1 BuiltIn PointSize\n"
798                                             "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
799                                             "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
800                                             "OpDecorate %29 Block\n"
801                                             "OpMemberDecorate %34 0 BuiltIn Position\n"
802                                             "OpMemberDecorate %34 1 BuiltIn PointSize\n"
803                                             "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
804                                             "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
805                                             "OpDecorate %34 Block\n"
806                                             "OpDecorate %47 BuiltIn TessCoord\n"
807                                             "%2 = OpTypeVoid\n"
808                                             "%3 = OpTypeFunction %2\n"
809                                             "%6 = OpTypeBool\n"
810                                             "%7 = OpTypeInt 32 0\n"
811                                             "%8 = OpConstant %7 3\n"
812                                             "%12 = OpTypeFloat 32\n"
813                                             "%13 = OpTypeVector %12 4\n"
814                                             "%14 = OpTypePointer Output %13\n"
815                                             "%15 = OpVariable %14 Output\n"
816                                             "%16 = OpConstant %12 71\n" //electedValue
817                                             "%17 = OpConstant %7 0\n"
818                                             "%18 = OpTypePointer Output %12\n"
819                                             "%20 = OpConstant %12 2\n"
820                                             "%21 = OpConstant %7 1\n"
821                                             "%24 = OpConstant %12 " +
822                                             unelectedValue.str() +
823                                             "\n"
824                                             "%26 = OpConstant %12 0\n"
825                                             "%28 = OpTypeArray %12 %21\n"
826                                             "%29 = OpTypeStruct %13 %12 %28 %28\n"
827                                             "%30 = OpTypePointer Output %29\n"
828                                             "%31 = OpVariable %30 Output\n"
829                                             "%32 = OpTypeInt 32 1\n"
830                                             "%33 = OpConstant %32 0\n"
831                                             "%34 = OpTypeStruct %13 %12 %28 %28\n"
832                                             "%35 = OpConstant %7 32\n"
833                                             "%36 = OpTypeArray %34 %35\n"
834                                             "%37 = OpTypePointer Input %36\n"
835                                             "%38 = OpVariable %37 Input\n"
836                                             "%39 = OpTypePointer Input %13\n"
837                                             "%42 = OpConstant %32 1\n"
838                                             "%45 = OpTypeVector %12 3\n"
839                                             "%46 = OpTypePointer Input %45\n"
840                                             "%47 = OpVariable %46 Input\n"
841                                             "%48 = OpTypePointer Input %12\n"
842                                             "%4 = OpFunction %2 None %3\n"
843                                             "%5 = OpLabel\n"
844                                             "%9 = OpGroupNonUniformElect %6 %8\n"
845                                             "OpSelectionMerge %11 None\n"
846                                             "OpBranchConditional %9 %10 %23\n"
847                                             "%10 = OpLabel\n"
848                                             "%19 = OpAccessChain %18 %15 %17\n"
849                                             "OpStore %19 %16\n"
850                                             "%22 = OpAccessChain %18 %15 %21\n"
851                                             "OpStore %22 %20\n"
852                                             "OpBranch %11\n"
853                                             "%23 = OpLabel\n"
854                                             "%25 = OpAccessChain %18 %15 %17\n"
855                                             "OpStore %25 %24\n"
856                                             "%27 = OpAccessChain %18 %15 %21\n"
857                                             "OpStore %27 %26\n"
858                                             "OpBranch %11\n"
859                                             "%11 = OpLabel\n"
860                                             "%40 = OpAccessChain %39 %38 %33 %33\n"
861                                             "%41 = OpLoad %13 %40\n"
862                                             "%43 = OpAccessChain %39 %38 %42 %33\n"
863                                             "%44 = OpLoad %13 %43\n"
864                                             "%49 = OpAccessChain %48 %47 %17\n"
865                                             "%50 = OpLoad %12 %49\n"
866                                             "%51 = OpCompositeConstruct %13 %50 %50 %50 %50\n"
867                                             "%52 = OpExtInst %13 %1 FMix %41 %44 %51\n"
868                                             "%53 = OpAccessChain %14 %31 %33\n"
869                                             "OpStore %53 %52\n"
870                                             "OpReturn\n"
871                                             "OpFunctionEnd\n";
872 
873             programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
874         }
875         else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
876         {
877             /*
878                 "#extension GL_KHR_shader_subgroup_basic: enable\n"
879                 "#extension GL_EXT_tessellation_shader : require\n"
880                 "layout(vertices = 2) out;\n"
881                 "layout(location = 0) out vec4 out_color[];\n"
882                 "void main (void)\n"
883                 "{\n"
884                 "  if (gl_InvocationID == 0)\n"
885                 "  {\n"
886                 "    gl_TessLevelOuter[0] = 1.0f;\n"
887                 "    gl_TessLevelOuter[1] = 1.0f;\n"
888                 "  }\n"
889                 "  if (subgroupElect())\n"
890                 "  {\n"
891                 "    out_color[gl_InvocationID].r = " << ELECTED_VALUE << ";\n"
892                 "    out_color[gl_InvocationID].g = 1.0f;\n"
893                 "  }\n"
894                 "  else\n"
895                 "  {\n"
896                 "    out_color[gl_InvocationID].r = " << UNELECTED_VALUE << ";\n"
897                 "    out_color[gl_InvocationID].g = 0.0f;\n"
898                 "  }\n"
899                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
900                 "}\n";
901             */
902             const string controlSource = "; SPIR-V\n"
903                                          "; Version: 1.3\n"
904                                          "; Generator: Khronos Glslang Reference Front End; 2\n"
905                                          "; Bound: 66\n"
906                                          "; Schema: 0\n"
907                                          "OpCapability Tessellation\n"
908                                          "OpCapability GroupNonUniform\n"
909                                          "%1 = OpExtInstImport \"GLSL.std.450\"\n"
910                                          "OpMemoryModel Logical GLSL450\n"
911                                          "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %34 %53 %59\n"
912                                          "OpExecutionMode %4 OutputVertices 2\n"
913                                          "OpDecorate %8 BuiltIn InvocationId\n"
914                                          "OpDecorate %20 Patch\n"
915                                          "OpDecorate %20 BuiltIn TessLevelOuter\n"
916                                          "OpDecorate %34 Location 0\n"
917                                          "OpMemberDecorate %50 0 BuiltIn Position\n"
918                                          "OpMemberDecorate %50 1 BuiltIn PointSize\n"
919                                          "OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
920                                          "OpMemberDecorate %50 3 BuiltIn CullDistance\n"
921                                          "OpDecorate %50 Block\n"
922                                          "OpMemberDecorate %55 0 BuiltIn Position\n"
923                                          "OpMemberDecorate %55 1 BuiltIn PointSize\n"
924                                          "OpMemberDecorate %55 2 BuiltIn ClipDistance\n"
925                                          "OpMemberDecorate %55 3 BuiltIn CullDistance\n"
926                                          "OpDecorate %55 Block\n"
927                                          "%2 = OpTypeVoid\n"
928                                          "%3 = OpTypeFunction %2\n"
929                                          "%6 = OpTypeInt 32 1\n"
930                                          "%7 = OpTypePointer Input %6\n"
931                                          "%8 = OpVariable %7 Input\n"
932                                          "%10 = OpConstant %6 0\n"
933                                          "%11 = OpTypeBool\n"
934                                          "%15 = OpTypeFloat 32\n"
935                                          "%16 = OpTypeInt 32 0\n"
936                                          "%17 = OpConstant %16 4\n"
937                                          "%18 = OpTypeArray %15 %17\n"
938                                          "%19 = OpTypePointer Output %18\n"
939                                          "%20 = OpVariable %19 Output\n"
940                                          "%21 = OpConstant %15 1\n"
941                                          "%22 = OpTypePointer Output %15\n"
942                                          "%24 = OpConstant %6 1\n"
943                                          "%26 = OpConstant %16 3\n"
944                                          "%30 = OpTypeVector %15 4\n"
945                                          "%31 = OpConstant %16 2\n"
946                                          "%32 = OpTypeArray %30 %31\n"
947                                          "%33 = OpTypePointer Output %32\n"
948                                          "%34 = OpVariable %33 Output\n"
949                                          "%36 = OpConstant %15 " +
950                                          electedValue.str() +
951                                          "\n"
952                                          "%37 = OpConstant %16 0\n"
953                                          "%40 = OpConstant %16 1\n"
954                                          "%44 = OpConstant %15 " +
955                                          unelectedValue.str() +
956                                          "\n"
957                                          "%47 = OpConstant %15 0\n"
958                                          "%49 = OpTypeArray %15 %40\n"
959                                          "%50 = OpTypeStruct %30 %15 %49 %49\n"
960                                          "%51 = OpTypeArray %50 %31\n"
961                                          "%52 = OpTypePointer Output %51\n"
962                                          "%53 = OpVariable %52 Output\n"
963                                          "%55 = OpTypeStruct %30 %15 %49 %49\n"
964                                          "%56 = OpConstant %16 32\n"
965                                          "%57 = OpTypeArray %55 %56\n"
966                                          "%58 = OpTypePointer Input %57\n"
967                                          "%59 = OpVariable %58 Input\n"
968                                          "%61 = OpTypePointer Input %30\n"
969                                          "%64 = OpTypePointer Output %30\n"
970                                          "%4 = OpFunction %2 None %3\n"
971                                          "%5 = OpLabel\n"
972                                          "%9 = OpLoad %6 %8\n"
973                                          "%12 = OpIEqual %11 %9 %10\n"
974                                          "OpSelectionMerge %14 None\n"
975                                          "OpBranchConditional %12 %13 %14\n"
976                                          "%13 = OpLabel\n"
977                                          "%23 = OpAccessChain %22 %20 %10\n"
978                                          "OpStore %23 %21\n"
979                                          "%25 = OpAccessChain %22 %20 %24\n"
980                                          "OpStore %25 %21\n"
981                                          "OpBranch %14\n"
982                                          "%14 = OpLabel\n"
983                                          "%27 = OpGroupNonUniformElect %11 %26\n"
984                                          "OpSelectionMerge %29 None\n"
985                                          "OpBranchConditional %27 %28 %42\n"
986                                          "%28 = OpLabel\n"
987                                          "%35 = OpLoad %6 %8\n"
988                                          "%38 = OpAccessChain %22 %34 %35 %37\n"
989                                          "OpStore %38 %36\n"
990                                          "%39 = OpLoad %6 %8\n"
991                                          "%41 = OpAccessChain %22 %34 %39 %40\n"
992                                          "OpStore %41 %21\n"
993                                          "OpBranch %29\n"
994                                          "%42 = OpLabel\n"
995                                          "%43 = OpLoad %6 %8\n"
996                                          "%45 = OpAccessChain %22 %34 %43 %37\n"
997                                          "OpStore %45 %44\n"
998                                          "%46 = OpLoad %6 %8\n"
999                                          "%48 = OpAccessChain %22 %34 %46 %40\n"
1000                                          "OpStore %48 %47\n"
1001                                          "OpBranch %29\n"
1002                                          "%29 = OpLabel\n"
1003                                          "%54 = OpLoad %6 %8\n"
1004                                          "%60 = OpLoad %6 %8\n"
1005                                          "%62 = OpAccessChain %61 %59 %60 %10\n"
1006                                          "%63 = OpLoad %30 %62\n"
1007                                          "%65 = OpAccessChain %64 %53 %54 %10\n"
1008                                          "OpStore %65 %63\n"
1009                                          "OpReturn\n"
1010                                          "OpFunctionEnd\n";
1011 
1012             programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
1013 
1014             /*
1015                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1016                 "#extension GL_EXT_tessellation_shader : require\n"
1017                 "layout(isolines, equal_spacing, ccw ) in;\n"
1018                 "layout(location = 0) in vec4 in_color[];\n"
1019                 "layout(location = 0) out vec4 out_color;\n"
1020                 "\n"
1021                 "void main (void)\n"
1022                 "{\n"
1023                 "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1024                 "  out_color = in_color[0];\n"
1025                 "}\n";
1026             */
1027 
1028             const string evaluationSource = "; SPIR-V\n"
1029                                             "; Version: 1.3\n"
1030                                             "; Generator: Khronos Glslang Reference Front End; 2\n"
1031                                             "; Bound: 44\n"
1032                                             "; Schema: 0\n"
1033                                             "OpCapability Tessellation\n"
1034                                             "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1035                                             "OpMemoryModel Logical GLSL450\n"
1036                                             "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %41\n"
1037                                             "OpExecutionMode %4 Isolines\n"
1038                                             "OpExecutionMode %4 SpacingEqual\n"
1039                                             "OpExecutionMode %4 VertexOrderCcw\n"
1040                                             "OpMemberDecorate %11 0 BuiltIn Position\n"
1041                                             "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1042                                             "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1043                                             "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1044                                             "OpDecorate %11 Block\n"
1045                                             "OpMemberDecorate %16 0 BuiltIn Position\n"
1046                                             "OpMemberDecorate %16 1 BuiltIn PointSize\n"
1047                                             "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1048                                             "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1049                                             "OpDecorate %16 Block\n"
1050                                             "OpDecorate %29 BuiltIn TessCoord\n"
1051                                             "OpDecorate %38 Location 0\n"
1052                                             "OpDecorate %41 Location 0\n"
1053                                             "%2 = OpTypeVoid\n"
1054                                             "%3 = OpTypeFunction %2\n"
1055                                             "%6 = OpTypeFloat 32\n"
1056                                             "%7 = OpTypeVector %6 4\n"
1057                                             "%8 = OpTypeInt 32 0\n"
1058                                             "%9 = OpConstant %8 1\n"
1059                                             "%10 = OpTypeArray %6 %9\n"
1060                                             "%11 = OpTypeStruct %7 %6 %10 %10\n"
1061                                             "%12 = OpTypePointer Output %11\n"
1062                                             "%13 = OpVariable %12 Output\n"
1063                                             "%14 = OpTypeInt 32 1\n"
1064                                             "%15 = OpConstant %14 0\n"
1065                                             "%16 = OpTypeStruct %7 %6 %10 %10\n"
1066                                             "%17 = OpConstant %8 32\n"
1067                                             "%18 = OpTypeArray %16 %17\n"
1068                                             "%19 = OpTypePointer Input %18\n"
1069                                             "%20 = OpVariable %19 Input\n"
1070                                             "%21 = OpTypePointer Input %7\n"
1071                                             "%24 = OpConstant %14 1\n"
1072                                             "%27 = OpTypeVector %6 3\n"
1073                                             "%28 = OpTypePointer Input %27\n"
1074                                             "%29 = OpVariable %28 Input\n"
1075                                             "%30 = OpConstant %8 0\n"
1076                                             "%31 = OpTypePointer Input %6\n"
1077                                             "%36 = OpTypePointer Output %7\n"
1078                                             "%38 = OpVariable %36 Output\n"
1079                                             "%39 = OpTypeArray %7 %17\n"
1080                                             "%40 = OpTypePointer Input %39\n"
1081                                             "%41 = OpVariable %40 Input\n"
1082                                             "%4 = OpFunction %2 None %3\n"
1083                                             "%5 = OpLabel\n"
1084                                             "%22 = OpAccessChain %21 %20 %15 %15\n"
1085                                             "%23 = OpLoad %7 %22\n"
1086                                             "%25 = OpAccessChain %21 %20 %24 %15\n"
1087                                             "%26 = OpLoad %7 %25\n"
1088                                             "%32 = OpAccessChain %31 %29 %30\n"
1089                                             "%33 = OpLoad %6 %32\n"
1090                                             "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1091                                             "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1092                                             "%37 = OpAccessChain %36 %13 %15\n"
1093                                             "OpStore %37 %35\n"
1094                                             "%42 = OpAccessChain %21 %41 %15\n"
1095                                             "%43 = OpLoad %7 %42\n"
1096                                             "OpStore %38 %43\n"
1097                                             "OpReturn\n"
1098                                             "OpFunctionEnd\n";
1099 
1100             programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
1101         }
1102         else
1103             TCU_THROW(InternalError, "Unsupported shader stage");
1104     }
1105     else
1106     {
1107         const string color = (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage) ?
1108                                  "out_color[gl_InvocationID].b = 1.0f;\n" :
1109                                  "out_color.b = 1.0f;\n";
1110         ostringstream bdy;
1111 
1112         switch (caseDef.opType)
1113         {
1114         case OPTYPE_SUBGROUP_BARRIER:
1115         case OPTYPE_SUBGROUP_MEMORY_BARRIER:
1116         case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
1117         {
1118             bdy << " tempResult2 = tempBuffer[id];\n"
1119                 << "  if (subgroupElect())\n"
1120                 << "  {\n"
1121                 << "    tempResult = value;\n"
1122                 << "    " << color << "  }\n"
1123                 << "  else\n"
1124                 << "  {\n"
1125                 << "    tempResult = tempBuffer[id];\n"
1126                 << "  }\n"
1127                 << "  " << getOpTypeName(caseDef.opType) << "();\n";
1128             break;
1129         }
1130 
1131         case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
1132             bdy << "tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
1133                 << "  if (subgroupElect())\n"
1134                 << "  {\n"
1135                 << "    tempResult = value;\n"
1136                 << "     " << color << "  }\n"
1137                 << "  else\n"
1138                 << "  {\n"
1139                 << "    tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
1140                 << "  }\n"
1141                 << "  subgroupMemoryBarrierImage();\n";
1142             break;
1143 
1144         default:
1145             TCU_THROW(InternalError, "Unhandled op type");
1146         }
1147 
1148         if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1149         {
1150             ostringstream fragment;
1151 
1152             fragment << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1153                      << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1154                      << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1155                      << "layout(location = 0) out vec4 out_color;\n"
1156                      << "\n"
1157                      << "layout(set = 0, binding = 0) uniform Buffer1\n"
1158                      << "{\n"
1159                      << "  uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
1160                      << "};\n"
1161                      << "\n"
1162                      << "layout(set = 0, binding = 1) uniform Buffer2\n"
1163                      << "{\n"
1164                      << "  uint value;\n"
1165                      << "};\n"
1166                      << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1167                              "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" :
1168                              "\n")
1169                      << "void main (void)\n"
1170                      << "{\n"
1171                      << "  if (gl_HelperInvocation) return;\n"
1172                      << "  uint id = 0;\n"
1173                      << "  if (subgroupElect())\n"
1174                      << "  {\n"
1175                      << "    id = uint(gl_FragCoord.x);\n"
1176                      << "  }\n"
1177                      << "  id = subgroupBroadcastFirst(id);\n"
1178                      << "  uint localId = id;\n"
1179                      << "  uint tempResult = 0u;\n"
1180                      << "  uint tempResult2 = 0u;\n"
1181                      << "  out_color.b = 0.0f;\n"
1182                      << bdy.str() << "  out_color.r = float(tempResult);\n"
1183                      << "  out_color.g = float(value);\n"
1184                      << "  out_color.a = float(tempResult2);\n"
1185                      << "}\n";
1186 
1187             programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment.str()) << buildOptions;
1188         }
1189         else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1190         {
1191             ostringstream vertex;
1192 
1193             vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1194                    << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1195                    << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1196                    << "\n"
1197                    << "layout(location = 0) out vec4 out_color;\n"
1198                    << "layout(location = 0) in highp vec4 in_position;\n"
1199                    << "\n"
1200                    << "layout(set = 0, binding = 0) uniform Buffer1\n"
1201                    << "{\n"
1202                    << "  uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
1203                    << "};\n"
1204                    << "\n"
1205                    << "layout(set = 0, binding = 1) uniform Buffer2\n"
1206                    << "{\n"
1207                    << "  uint value;\n"
1208                    << "};\n"
1209                    << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1210                            "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" :
1211                            "\n")
1212                    << "void main (void)\n"
1213                    << "{\n"
1214                    << "  uint id = 0;\n"
1215                    << "  if (subgroupElect())\n"
1216                    << "  {\n"
1217                    << "    id = gl_VertexIndex;\n"
1218                    << "  }\n"
1219                    << "  id = subgroupBroadcastFirst(id);\n"
1220                    << "  uint tempResult = 0u;\n"
1221                    << "  uint tempResult2 = 0u;\n"
1222                    << "  out_color.b = 0.0f;\n"
1223                    << bdy.str() << "  out_color.r = float(tempResult);\n"
1224                    << "  out_color.g = float(value);\n"
1225                    << "  out_color.a = float(tempResult2);\n"
1226                    << "  gl_Position = in_position;\n"
1227                    << "  gl_PointSize = 1.0f;\n"
1228                    << "}\n";
1229 
1230             programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1231         }
1232         else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
1233         {
1234             ostringstream geometry;
1235 
1236             geometry << "#version 450\n"
1237                      << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1238                      << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1239                      << "layout(points) in;\n"
1240                      << "layout(points, max_vertices = 1) out;\n"
1241                      << "layout(location = 0) out vec4 out_color;\n"
1242                      << "layout(set = 0, binding = 0) uniform Buffer1\n"
1243                      << "{\n"
1244                      << "  uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
1245                      << "};\n"
1246                      << "\n"
1247                      << "layout(set = 0, binding = 1) uniform Buffer2\n"
1248                      << "{\n"
1249                      << "  uint value;\n"
1250                      << "};\n"
1251                      << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1252                              "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" :
1253                              "\n")
1254                      << "void main (void)\n"
1255                      << "{\n"
1256                      << "  uint id = 0;\n"
1257                      << "  if (subgroupElect())\n"
1258                      << "  {\n"
1259                      << "    id = gl_InvocationID;\n"
1260                      << "  }\n"
1261                      << "  id = subgroupBroadcastFirst(id);\n"
1262                      << "  uint tempResult = 0u;\n"
1263                      << "  uint tempResult2 = 0u;\n"
1264                      << "  out_color.b = 0.0f;\n"
1265                      << bdy.str() << "  out_color.r = float(tempResult);\n"
1266                      << "  out_color.g = float(value);\n"
1267                      << "  out_color.a = float(tempResult2);\n"
1268                      << "  gl_Position = gl_in[0].gl_Position;\n"
1269                      << (*caseDef.geometryPointSizeSupported ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1270                      << "  EmitVertex();\n"
1271                      << "  EndPrimitive();\n"
1272                      << "}\n";
1273 
1274             programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1275         }
1276         else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
1277         {
1278             ostringstream controlSource;
1279             ostringstream evaluationSource;
1280 
1281             controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1282                           << "#extension GL_EXT_tessellation_shader : require\n"
1283                           << "layout(vertices = 2) out;\n"
1284                           << "void main (void)\n"
1285                           << "{\n"
1286                           << "  if (gl_InvocationID == 0)\n"
1287                           << "  {\n"
1288                           << "    gl_TessLevelOuter[0] = 1.0f;\n"
1289                           << "    gl_TessLevelOuter[1] = 1.0f;\n"
1290                           << "  }\n"
1291                           << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1292                           << (*caseDef.geometryPointSizeSupported ?
1293                                   "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" :
1294                                   "")
1295                           << "}\n";
1296 
1297             evaluationSource
1298                 << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1299                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1300                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1301                 << "#extension GL_EXT_tessellation_shader : require\n"
1302                 << "layout(isolines, equal_spacing, ccw ) in;\n"
1303                 << "layout(location = 0) out vec4 out_color;\n"
1304                 << "layout(set = 0, binding = 0) uniform Buffer1\n"
1305                 << "{\n"
1306                 << "  uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
1307                 << "};\n"
1308                 << "\n"
1309                 << "layout(set = 0, binding = 1) uniform Buffer2\n"
1310                 << "{\n"
1311                 << "  uint value;\n"
1312                 << "};\n"
1313                 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1314                         "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" :
1315                         "\n")
1316                 << "void main (void)\n"
1317                 << "{\n"
1318                 << "  uint id = 0;\n"
1319                 << "  if (subgroupElect())\n"
1320                 << "  {\n"
1321                 << "    id = gl_PrimitiveID;\n"
1322                 << "  }\n"
1323                 << "  id = subgroupBroadcastFirst(id);\n"
1324                 << "  uint tempResult = 0u;\n"
1325                 << "  uint tempResult2 = 0u;\n"
1326                 << "  out_color.b = 0.0f;\n"
1327                 << bdy.str() << "  out_color.r = float(tempResult);\n"
1328                 << "  out_color.g = float(value);\n"
1329                 << "  out_color.a = float(tempResult2);\n"
1330                 << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1331                 << (*caseDef.geometryPointSizeSupported ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << "}\n";
1332 
1333             programCollection.glslSources.add("tesc")
1334                 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1335             programCollection.glslSources.add("tese")
1336                 << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1337         }
1338         else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
1339         {
1340             ostringstream controlSource;
1341             ostringstream evaluationSource;
1342 
1343             controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1344                           << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1345                           << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1346                           << "#extension GL_EXT_tessellation_shader : require\n"
1347                           << "layout(vertices = 2) out;\n"
1348                           << "layout(location = 0) out vec4 out_color[];\n"
1349                           << "layout(set = 0, binding = 0) uniform Buffer1\n"
1350                           << "{\n"
1351                           << "  uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
1352                           << "};\n"
1353                           << "\n"
1354                           << "layout(set = 0, binding = 1) uniform Buffer2\n"
1355                           << "{\n"
1356                           << "  uint value;\n"
1357                           << "};\n"
1358                           << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1359                                   "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" :
1360                                   "\n")
1361                           << "void main (void)\n"
1362                           << "{\n"
1363                           << "  uint id = 0;\n"
1364                           << "  if (gl_InvocationID == 0)\n"
1365                           << "  {\n"
1366                           << "    gl_TessLevelOuter[0] = 1.0f;\n"
1367                           << "    gl_TessLevelOuter[1] = 1.0f;\n"
1368                           << "  }\n"
1369                           << "  if (subgroupElect())\n"
1370                           << "  {\n"
1371                           << "    id = gl_InvocationID;\n"
1372                           << "  }\n"
1373                           << "  id = subgroupBroadcastFirst(id);\n"
1374                           << "  uint tempResult = 0u;\n"
1375                           << "  uint tempResult2 = 0u;\n"
1376                           << "  out_color[gl_InvocationID].b = 0.0f;\n"
1377                           << bdy.str() << "  out_color[gl_InvocationID].r = float(tempResult);\n"
1378                           << "  out_color[gl_InvocationID].g = float(value);\n"
1379                           << "  out_color[gl_InvocationID].a = float(tempResult2);\n"
1380                           << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1381                           << (*caseDef.geometryPointSizeSupported ?
1382                                   "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" :
1383                                   "")
1384                           << "}\n";
1385 
1386             evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1387                              << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1388                              << "#extension GL_EXT_tessellation_shader : require\n"
1389                              << "layout(isolines, equal_spacing, ccw ) in;\n"
1390                              << "layout(location = 0) in vec4 in_color[];\n"
1391                              << "layout(location = 0) out vec4 out_color;\n"
1392                              << "\n"
1393                              << "void main (void)\n"
1394                              << "{\n"
1395                              << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1396                              << (*caseDef.geometryPointSizeSupported ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1397                              << "  out_color = in_color[0];\n"
1398                              << "}\n";
1399 
1400             programCollection.glslSources.add("tesc")
1401                 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1402             programCollection.glslSources.add("tese")
1403                 << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1404         }
1405         else
1406             TCU_THROW(InternalError, "Unsupported shader stage");
1407     }
1408 }
1409 
getPerStageHeadDeclarations(const CaseDefinition & caseDef)1410 vector<string> getPerStageHeadDeclarations(const CaseDefinition &caseDef)
1411 {
1412     const uint32_t stageCount = subgroups::getStagesCount(caseDef.shaderStage);
1413     const bool fragment       = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
1414     vector<string> result(stageCount, string());
1415 
1416     if (fragment)
1417         result.resize(result.size() + 1);
1418 
1419     if (caseDef.opType == OPTYPE_ELECT)
1420     {
1421         for (size_t i = 0; i < result.size(); ++i)
1422         {
1423             const bool frag       = (i == stageCount);
1424             const size_t binding1 = i;
1425             const size_t binding2 = stageCount + i;
1426 
1427             if (frag)
1428             {
1429                 result[i] += "layout(location = 0) out uint result;\n";
1430             }
1431             else
1432             {
1433                 result[i] += "layout(set = 0, binding = " + de::toString(binding1) +
1434                              ", std430) buffer Buffer1\n"
1435                              "{\n"
1436                              "  uint result[];\n"
1437                              "};\n";
1438             }
1439 
1440             result[i] += "layout(set = 0, binding = " + de::toString(binding2) +
1441                          ", std430) buffer Buffer2\n"
1442                          "{\n"
1443                          "  uint numSubgroupsExecuted;\n"
1444                          "};\n";
1445         }
1446     }
1447     else
1448     {
1449         for (size_t i = 0; i < result.size(); ++i)
1450         {
1451             const bool frag       = (i == stageCount);
1452             const size_t binding1 = i;
1453             const size_t binding2 = stageCount + 4 * i;
1454             const size_t binding3 = stageCount + 4 * i + 1;
1455             const size_t binding4 = stageCount + 4 * i + 2;
1456             const size_t binding5 = stageCount + 4 * i + 3;
1457 
1458             if (frag)
1459             {
1460                 result[i] = "layout(location = 0) out uint result;\n";
1461             }
1462             else
1463             {
1464                 result[i] += "layout(set = 0, binding = " + de::toString(binding1) +
1465                              ", std430) buffer Buffer1\n"
1466                              "{\n"
1467                              "  uint result[];\n"
1468                              "};\n";
1469             }
1470 
1471             result[i] += "layout(set = 0, binding = " + de::toString(binding2) +
1472                          ", std430) buffer Buffer2\n"
1473                          "{\n"
1474                          "  uint tempBuffer[];\n"
1475                          "};\n"
1476                          "layout(set = 0, binding = " +
1477                          de::toString(binding3) +
1478                          ", std430) buffer Buffer3\n"
1479                          "{\n"
1480                          "  uint subgroupID;\n"
1481                          "};\n"
1482                          "layout(set = 0, binding = " +
1483                          de::toString(binding4) +
1484                          ", std430) buffer Buffer4\n"
1485                          "{\n"
1486                          "  uint value;\n"
1487                          "};\n"
1488                          "layout(set = 0, binding = " +
1489                          de::toString(binding5) + ", r32ui) uniform uimage2D tempImage;\n";
1490         }
1491     }
1492 
1493     return result;
1494 }
1495 
getTestString(const CaseDefinition & caseDef)1496 string getTestString(const CaseDefinition &caseDef)
1497 {
1498     stringstream body;
1499 
1500 #ifndef CTS_USES_VULKANSC
1501     if (caseDef.opType != OPTYPE_ELECT &&
1502         (isAllGraphicsStages(caseDef.shaderStage) || isAllRayTracingStages(caseDef.shaderStage)))
1503 #else
1504     if (caseDef.opType != OPTYPE_ELECT && (isAllGraphicsStages(caseDef.shaderStage)))
1505 #endif // CTS_USES_VULKANSC
1506     {
1507         body << "  uint id = 0;\n"
1508                 "  if (subgroupElect())\n"
1509                 "  {\n"
1510                 "    id = atomicAdd(subgroupID, 1);\n"
1511                 "  }\n"
1512                 "  id = subgroupBroadcastFirst(id);\n"
1513                 "  uint localId = id;\n"
1514                 "  uint tempResult = 0;\n";
1515     }
1516 
1517     switch (caseDef.opType)
1518     {
1519     case OPTYPE_ELECT:
1520         if (isAllComputeStages(caseDef.shaderStage))
1521         {
1522             body << "  uint value = " << UNELECTED_VALUE
1523                  << ";\n"
1524                     "  if (subgroupElect())\n"
1525                     "  {\n"
1526                     "    value = "
1527                  << ELECTED_VALUE
1528                  << ";\n"
1529                     "  }\n"
1530                     "  uvec4 bits = bitCount(sharedMemoryBallot(value == "
1531                  << ELECTED_VALUE
1532                  << "));\n"
1533                     "  tempRes = bits.x + bits.y + bits.z + bits.w;\n";
1534         }
1535         else
1536         {
1537             body << "  if (subgroupElect())\n"
1538                     "  {\n"
1539                     "    tempRes = "
1540                  << ELECTED_VALUE
1541                  << ";\n"
1542                     "    atomicAdd(numSubgroupsExecuted, 1);\n"
1543                     "  }\n"
1544                     "  else\n"
1545                     "  {\n"
1546                     "    tempRes = "
1547                  << UNELECTED_VALUE
1548                  << ";\n"
1549                     "  }\n";
1550         }
1551         break;
1552 
1553     case OPTYPE_SUBGROUP_BARRIER:
1554     case OPTYPE_SUBGROUP_MEMORY_BARRIER:
1555     case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
1556         body << "  if (subgroupElect())\n"
1557                 "  {\n"
1558                 "    tempBuffer[id] = value;\n"
1559                 "  }\n"
1560                 "  "
1561              << getOpTypeName(caseDef.opType)
1562              << "();\n"
1563                 "  tempResult = tempBuffer[id];\n";
1564         break;
1565 
1566     case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
1567         body << "  if (subgroupElect())\n"
1568                 "  {\n"
1569                 "    tempShared[localId] = value;\n"
1570                 "  }\n"
1571                 "  subgroupMemoryBarrierShared();\n"
1572                 "  tempResult = tempShared[localId];\n";
1573         break;
1574 
1575     case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
1576         body << "  if (subgroupElect())\n"
1577                 "  {\n"
1578                 "    imageStore(tempImage, ivec2(id, 0), ivec4(value));\n"
1579                 "  }\n"
1580                 "  subgroupMemoryBarrierImage();\n"
1581                 "  tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n";
1582         break;
1583 
1584     default:
1585         TCU_THROW(InternalError, "Unhandled op type!");
1586     }
1587 
1588 #ifndef CTS_USES_VULKANSC
1589     if (caseDef.opType != OPTYPE_ELECT &&
1590         (isAllGraphicsStages(caseDef.shaderStage) || isAllRayTracingStages(caseDef.shaderStage)))
1591 #else
1592     if (caseDef.opType != OPTYPE_ELECT && (isAllGraphicsStages(caseDef.shaderStage)))
1593 #endif // CTS_USES_VULKANSC
1594     {
1595         body << "  tempRes = tempResult;\n";
1596     }
1597 
1598     return body.str();
1599 }
1600 
getExtHeader(const CaseDefinition & caseDef)1601 string getExtHeader(const CaseDefinition &caseDef)
1602 {
1603     const string extensions = (caseDef.opType == OPTYPE_ELECT) ? "#extension GL_KHR_shader_subgroup_basic: enable\n" :
1604                                                                  "#extension GL_KHR_shader_subgroup_basic: enable\n"
1605                                                                  "#extension GL_KHR_shader_subgroup_ballot: enable\n";
1606     return extensions;
1607 }
1608 
initComputeOrMeshPrograms(SourceCollections & programCollection,CaseDefinition & caseDef,const string & extensions,const string & testSrc,const ShaderBuildOptions & buildOptions)1609 void initComputeOrMeshPrograms(SourceCollections &programCollection, CaseDefinition &caseDef, const string &extensions,
1610                                const string &testSrc, const ShaderBuildOptions &buildOptions)
1611 {
1612     std::ostringstream electTemplateStream;
1613     electTemplateStream << "#version 450\n"
1614                         << extensions << "${EXTENSIONS:opt}"
1615                         << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1616                         << "${LAYOUTS:opt}"
1617                         << "layout (set = 0, binding = 0, std430) buffer Buffer1\n"
1618                         << "{\n"
1619                         << "  uint result[];\n"
1620                         << "};\n"
1621                         << "\n"
1622                         << subgroups::getSharedMemoryBallotHelper() << "void main (void)\n"
1623                         << "{\n"
1624                         << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1625                         << "  highp uint offset = globalSize.x * ((globalSize.y * gl_GlobalInvocationID.z) + "
1626                            "gl_GlobalInvocationID.y) + gl_GlobalInvocationID.x;\n"
1627                         << "  uint value = " << UNELECTED_VALUE << ";\n"
1628                         << "  if (subgroupElect())\n"
1629                         << "  {\n"
1630                         << "    value = " << ELECTED_VALUE << ";\n"
1631                         << "  }\n"
1632                         << "  uvec4 bits = bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "));\n"
1633                         << "  result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
1634                         << "${BODY:opt}"
1635                         << "}\n";
1636     ;
1637     const tcu::StringTemplate electTemplate(electTemplateStream.str());
1638 
1639     std::ostringstream nonElectTemplateStream;
1640     nonElectTemplateStream << "#version 450\n"
1641                            << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1642                            << "${EXTENSIONS:opt}"
1643                            << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1644                            << "${LAYOUTS:opt}"
1645                            << "layout (set = 0, binding = 0, std430) buffer Buffer1\n"
1646                            << "{\n"
1647                            << "  uint result[];\n"
1648                            << "};\n"
1649                            << "layout (set = 0, binding = 1, std430) buffer Buffer2\n"
1650                            << "{\n"
1651                            << "  uint tempBuffer[];\n"
1652                            << "};\n"
1653                            << "layout (set = 0, binding = 2, std430) buffer Buffer3\n"
1654                            << "{\n"
1655                            << "  uint value;\n"
1656                            << "};\n"
1657                            << "layout (set = 0, binding = 3, r32ui) uniform uimage2D tempImage;\n"
1658                            << "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
1659                            << "\n"
1660                            << "void main (void)\n"
1661                            << "{\n"
1662                            << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1663                            << "  highp uint offset = globalSize.x * ((globalSize.y * gl_GlobalInvocationID.z) + "
1664                               "gl_GlobalInvocationID.y) + gl_GlobalInvocationID.x;\n"
1665                            << "  uint localId = gl_SubgroupID;\n"
1666                            << "  uint id = globalSize.x * ((globalSize.y * gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
1667                               "gl_WorkGroupID.x + localId;\n"
1668                            << "  uint tempResult = 0;\n"
1669                            << testSrc << "  result[offset] = tempResult;\n"
1670                            << "${BODY:opt}"
1671                            << "}\n";
1672     ;
1673     const tcu::StringTemplate nonElectTemplate(nonElectTemplateStream.str());
1674 
1675     if (isAllComputeStages(caseDef.shaderStage))
1676     {
1677         const std::map<std::string, std::string> emptyMap;
1678 
1679         if (OPTYPE_ELECT == caseDef.opType)
1680         {
1681             const auto programSource = electTemplate.specialize(emptyMap);
1682             programCollection.glslSources.add("comp") << glu::ComputeSource(programSource) << buildOptions;
1683         }
1684         else
1685         {
1686             const auto programSource = nonElectTemplate.specialize(emptyMap);
1687             programCollection.glslSources.add("comp") << glu::ComputeSource(programSource) << buildOptions;
1688         }
1689     }
1690 #ifndef CTS_USES_VULKANSC
1691     else if (isAllMeshShadingStages(caseDef.shaderStage))
1692     {
1693         const bool testMesh       = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1694         const bool testTask       = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1695         const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1696 
1697         const std::map<std::string, std::string> meshMap{
1698             std::make_pair("EXTENSIONS", "#extension GL_EXT_mesh_shader : enable\n"),
1699             std::make_pair("LAYOUTS", "layout (points) out;\nlayout (max_vertices=1, max_primitives=1) out;\n"),
1700             std::make_pair("BODY", "  SetMeshOutputsEXT(0u, 0u);\n")};
1701 
1702         const std::map<std::string, std::string> taskMap{
1703             std::make_pair("EXTENSIONS", "#extension GL_EXT_mesh_shader : enable\n"),
1704             std::make_pair("BODY", "  EmitMeshTasksEXT(" + std::to_string(emitSize.x()) + ", " +
1705                                        std::to_string(emitSize.y()) + ", " + std::to_string(emitSize.z()) + ");\n")};
1706 
1707         if (testMesh)
1708         {
1709             if (OPTYPE_ELECT == caseDef.opType)
1710             {
1711                 const auto programSource = electTemplate.specialize(meshMap);
1712                 programCollection.glslSources.add("mesh") << glu::MeshSource(programSource) << buildOptions;
1713             }
1714             else
1715             {
1716                 const auto programSource = nonElectTemplate.specialize(meshMap);
1717                 programCollection.glslSources.add("mesh") << glu::MeshSource(programSource) << buildOptions;
1718             }
1719         }
1720         else
1721         {
1722             const std::string meshShaderNoSubgroups =
1723                 "#version 450\n"
1724                 "#extension GL_EXT_mesh_shader : enable\n"
1725                 "\n"
1726                 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1727                 "layout (points) out;\n"
1728                 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1729                 "\n"
1730                 "void main (void)\n"
1731                 "{\n"
1732                 "  SetMeshOutputsEXT(0u, 0u);\n"
1733                 "}\n";
1734             programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1735         }
1736 
1737         if (testTask)
1738         {
1739             if (OPTYPE_ELECT == caseDef.opType)
1740             {
1741                 const auto programSource = electTemplate.specialize(taskMap);
1742                 programCollection.glslSources.add("task") << glu::TaskSource(programSource) << buildOptions;
1743             }
1744             else
1745             {
1746                 const auto programSource = nonElectTemplate.specialize(taskMap);
1747                 programCollection.glslSources.add("task") << glu::TaskSource(programSource) << buildOptions;
1748             }
1749         }
1750     }
1751 #endif // CTS_USES_VULKANSC
1752     else
1753     {
1754         DE_ASSERT(false);
1755     }
1756 }
1757 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)1758 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
1759 {
1760 #ifndef CTS_USES_VULKANSC
1761     const bool spirv14required =
1762         (isAllRayTracingStages(caseDef.shaderStage) || isAllMeshShadingStages(caseDef.shaderStage));
1763 #else
1764     const bool spirv14required = false;
1765 #endif // CTS_USES_VULKANSC
1766     const SpirvVersion spirvVersion = (spirv14required ? SPIRV_VERSION_1_4 : SPIRV_VERSION_1_3);
1767     const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spirvVersion, 0u, spirv14required);
1768     const string extHeader                = getExtHeader(caseDef);
1769     const string testSrc                  = getTestString(caseDef);
1770     const vector<string> headDeclarations = getPerStageHeadDeclarations(caseDef);
1771     const bool pointSizeSupport           = *caseDef.geometryPointSizeSupported;
1772     const bool isComp                     = isAllComputeStages(caseDef.shaderStage);
1773 #ifndef CTS_USES_VULKANSC
1774     const bool isMesh = isAllMeshShadingStages(caseDef.shaderStage);
1775 #else
1776     const bool isMesh          = false;
1777 #endif // CTS_USES_VULKANSC
1778 
1779     if (isComp || isMesh)
1780         initComputeOrMeshPrograms(programCollection, caseDef, extHeader, testSrc, buildOptions);
1781     else
1782         subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
1783                                    pointSizeSupport, extHeader, testSrc, "", headDeclarations, true);
1784 }
1785 
supportedCheck(Context & context,CaseDefinition caseDef)1786 void supportedCheck(Context &context, CaseDefinition caseDef)
1787 {
1788     if (!subgroups::isSubgroupSupported(context))
1789         TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
1790 
1791     if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
1792         TCU_FAIL("supportedOperations will have the VK_SUBGROUP_FEATURE_BASIC_BIT bit set if any of the physical "
1793                  "device's queues support VK_QUEUE_GRAPHICS_BIT or VK_QUEUE_COMPUTE_BIT.");
1794 
1795     if (caseDef.requiredSubgroupSize)
1796     {
1797         context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
1798 
1799 #ifndef CTS_USES_VULKANSC
1800         const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
1801             context.getSubgroupSizeControlFeatures();
1802         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1803             context.getSubgroupSizeControlProperties();
1804 #else
1805         const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
1806             context.getSubgroupSizeControlFeaturesEXT();
1807         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1808             context.getSubgroupSizeControlPropertiesEXT();
1809 #endif // CTS_USES_VULKANSC
1810 
1811         if (subgroupSizeControlFeatures.subgroupSizeControl == false)
1812             TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
1813 
1814         if (subgroupSizeControlFeatures.computeFullSubgroups == false)
1815             TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
1816 
1817         if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
1818             TCU_THROW(NotSupportedError, "Required subgroup size is not supported for shader stage");
1819     }
1820 
1821     *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
1822 
1823     subgroups::supportedCheckShader(context, caseDef.shaderStage);
1824 
1825     if (OPTYPE_ELECT != caseDef.opType && VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
1826     {
1827         if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
1828         {
1829             TCU_THROW(NotSupportedError,
1830                       "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
1831         }
1832     }
1833 
1834 #ifndef CTS_USES_VULKANSC
1835     if (isAllRayTracingStages(caseDef.shaderStage))
1836     {
1837         context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
1838     }
1839     else if (isAllMeshShadingStages(caseDef.shaderStage))
1840     {
1841         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1842         context.requireDeviceFunctionality("VK_EXT_mesh_shader");
1843 
1844         if ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u)
1845         {
1846             const auto &features = context.getMeshShaderFeaturesEXT();
1847             if (!features.taskShader)
1848                 TCU_THROW(NotSupportedError, "Task shaders not supported");
1849         }
1850     }
1851 #endif // CTS_USES_VULKANSC
1852 }
1853 
noSSBOtest(Context & context,const CaseDefinition caseDef)1854 TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
1855 {
1856     const uint32_t inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
1857     vector<subgroups::SSBOData> inputDatas(inputDatasCount);
1858 
1859     inputDatas[0].format         = VK_FORMAT_R32_UINT;
1860     inputDatas[0].layout         = subgroups::SSBOData::LayoutStd140;
1861     inputDatas[0].numElements    = SHADER_BUFFER_SIZE / 4ull;
1862     inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1863     inputDatas[0].bindingType    = subgroups::SSBOData::BindingUBO;
1864 
1865     inputDatas[1].format         = VK_FORMAT_R32_UINT;
1866     inputDatas[1].layout         = subgroups::SSBOData::LayoutStd140;
1867     inputDatas[1].numElements    = 1ull;
1868     inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
1869     inputDatas[1].bindingType    = subgroups::SSBOData::BindingUBO;
1870 
1871     if (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType)
1872     {
1873         inputDatas[2].format         = VK_FORMAT_R32_UINT;
1874         inputDatas[2].layout         = subgroups::SSBOData::LayoutPacked;
1875         inputDatas[2].numElements    = SHADER_BUFFER_SIZE;
1876         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
1877         inputDatas[2].bindingType    = subgroups::SSBOData::BindingImage;
1878     }
1879 
1880     if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1881     {
1882         if (OPTYPE_ELECT == caseDef.opType)
1883             return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL,
1884                                                         checkVertexPipelineStagesSubgroupElectNoSSBO);
1885         else
1886             return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0],
1887                                                         inputDatasCount, DE_NULL,
1888                                                         (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1889                                                             checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
1890                                                             checkVertexPipelineStagesSubgroupBarriersNoSSBO);
1891     }
1892     else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1893     {
1894         return subgroups::makeFragmentFrameBufferTest(
1895             context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
1896             (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ? checkFragmentSubgroupBarriersWithImageNoSSBO :
1897                                                                        checkFragmentSubgroupBarriersNoSSBO);
1898     }
1899     else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
1900     {
1901         if (OPTYPE_ELECT == caseDef.opType)
1902             return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL,
1903                                                           checkVertexPipelineStagesSubgroupElectNoSSBO);
1904         else
1905             return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0],
1906                                                           inputDatasCount, DE_NULL,
1907                                                           (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1908                                                               checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
1909                                                               checkVertexPipelineStagesSubgroupBarriersNoSSBO);
1910     }
1911 
1912     if (OPTYPE_ELECT == caseDef.opType)
1913         return subgroups::makeTessellationEvaluationFrameBufferTest(
1914             context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL, checkVertexPipelineStagesSubgroupElectNoSSBO,
1915             caseDef.shaderStage);
1916 
1917     return subgroups::makeTessellationEvaluationFrameBufferTest(
1918         context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
1919         (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage) ?
1920             ((OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1921                  checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
1922                  checkVertexPipelineStagesSubgroupBarriersNoSSBO) :
1923             ((OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1924                  checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO :
1925                  checkTessellationEvaluationSubgroupBarriersNoSSBO),
1926         caseDef.shaderStage);
1927 }
1928 
test(Context & context,const CaseDefinition caseDef)1929 TestStatus test(Context &context, const CaseDefinition caseDef)
1930 {
1931     const bool isCompute = isAllComputeStages(caseDef.shaderStage);
1932 #ifndef CTS_USES_VULKANSC
1933     const bool isMesh = isAllMeshShadingStages(caseDef.shaderStage);
1934 #else
1935     const bool isMesh = false;
1936 #endif // CTS_USES_VULKANSC
1937     DE_ASSERT(!(isCompute && isMesh));
1938 
1939     if (isCompute || isMesh)
1940     {
1941 #ifndef CTS_USES_VULKANSC
1942         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1943             context.getSubgroupSizeControlProperties();
1944 #else
1945         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1946             context.getSubgroupSizeControlPropertiesEXT();
1947 #endif // CTS_USES_VULKANSC
1948         TestLog &log = context.getTestContext().getLog();
1949 
1950         if (OPTYPE_ELECT == caseDef.opType)
1951         {
1952             if (caseDef.requiredSubgroupSize == false)
1953             {
1954                 if (isCompute)
1955                     return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1956                                                       checkComputeOrMeshSubgroupElect);
1957                 else
1958                     return subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, nullptr, 0, nullptr,
1959                                                    checkComputeOrMeshSubgroupElect);
1960             }
1961 
1962             log << TestLog::Message << "Testing required subgroup size range ["
1963                 << subgroupSizeControlProperties.minSubgroupSize << ", "
1964                 << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
1965 
1966             // According to the spec, requiredSubgroupSize must be a power-of-two integer.
1967             for (uint32_t size = subgroupSizeControlProperties.minSubgroupSize;
1968                  size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
1969             {
1970                 TestStatus result(QP_TEST_RESULT_INTERNAL_ERROR, "Internal Error");
1971 
1972                 if (isCompute)
1973                     result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0u, DE_NULL,
1974                                                         checkComputeOrMeshSubgroupElect, size);
1975                 else
1976                     result = subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, nullptr, 0u, nullptr,
1977                                                      checkComputeOrMeshSubgroupElect, size);
1978 
1979                 if (result.getCode() != QP_TEST_RESULT_PASS)
1980                 {
1981                     log << TestLog::Message << "subgroupSize " << size << " failed" << TestLog::EndMessage;
1982                     return result;
1983                 }
1984             }
1985 
1986             return TestStatus::pass("OK");
1987         }
1988         else
1989         {
1990             const uint32_t inputDatasCount                        = 3;
1991             const subgroups::SSBOData inputDatas[inputDatasCount] = {
1992                 {
1993                     subgroups::SSBOData::InitializeNone, //  InputDataInitializeType initializeType;
1994                     subgroups::SSBOData::LayoutStd430,   //  InputDataLayoutType layout;
1995                     VK_FORMAT_R32_UINT,                  //  vk::VkFormat format;
1996                     SHADER_BUFFER_SIZE,                  //  vk::VkDeviceSize numElements;
1997                 },
1998                 {
1999                     subgroups::SSBOData::InitializeNonZero, //  InputDataInitializeType initializeType;
2000                     subgroups::SSBOData::LayoutStd430,      //  InputDataLayoutType layout;
2001                     VK_FORMAT_R32_UINT,                     //  vk::VkFormat format;
2002                     1,                                      //  vk::VkDeviceSize numElements;
2003                 },
2004                 {
2005                     subgroups::SSBOData::InitializeNone, //  InputDataInitializeType initializeType;
2006                     subgroups::SSBOData::LayoutPacked,   //  InputDataLayoutType layout;
2007                     VK_FORMAT_R32_UINT,                  //  vk::VkFormat format;
2008                     SHADER_BUFFER_SIZE,                  //  vk::VkDeviceSize numElements;
2009                     subgroups::SSBOData::BindingImage,   //  bool isImage;
2010                 },
2011             };
2012 
2013             if (caseDef.requiredSubgroupSize == false)
2014             {
2015                 if (isCompute)
2016                     return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL,
2017                                                       checkComputeOrMeshSubgroupBarriers);
2018                 else
2019                     return subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, nullptr,
2020                                                    checkComputeOrMeshSubgroupBarriers);
2021             }
2022 
2023             log << TestLog::Message << "Testing required subgroup size range ["
2024                 << subgroupSizeControlProperties.minSubgroupSize << ", "
2025                 << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
2026 
2027             // According to the spec, requiredSubgroupSize must be a power-of-two integer.
2028             for (uint32_t size = subgroupSizeControlProperties.minSubgroupSize;
2029                  size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
2030             {
2031                 TestStatus result(QP_TEST_RESULT_INTERNAL_ERROR, "Internal Error");
2032 
2033                 if (isCompute)
2034                     result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount,
2035                                                         DE_NULL, checkComputeOrMeshSubgroupBarriers, size);
2036                 else
2037                     result = subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, nullptr,
2038                                                      checkComputeOrMeshSubgroupBarriers, size);
2039 
2040                 if (result.getCode() != QP_TEST_RESULT_PASS)
2041                 {
2042                     log << TestLog::Message << "subgroupSize " << size << " failed" << TestLog::EndMessage;
2043                     return result;
2044                 }
2045             }
2046 
2047             return TestStatus::pass("OK");
2048         }
2049     }
2050     else if (isAllGraphicsStages(caseDef.shaderStage))
2051     {
2052         if (!subgroups::isFragmentSSBOSupportedForDevice(context))
2053         {
2054             TCU_THROW(NotSupportedError,
2055                       "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
2056         }
2057 
2058         const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
2059         const VkShaderStageFlags stagesBits[] = {
2060             VK_SHADER_STAGE_VERTEX_BIT,
2061             VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
2062             VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2063             VK_SHADER_STAGE_GEOMETRY_BIT,
2064             VK_SHADER_STAGE_FRAGMENT_BIT,
2065         };
2066 
2067         if (OPTYPE_ELECT == caseDef.opType)
2068         {
2069             const uint32_t inputCount = DE_LENGTH_OF_ARRAY(stagesBits);
2070             subgroups::SSBOData inputData[inputCount];
2071 
2072             for (uint32_t ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
2073             {
2074                 inputData[ndx] = {
2075                     subgroups::SSBOData::InitializeZero, //  InputDataInitializeType initializeType;
2076                     subgroups::SSBOData::LayoutStd430,   //  InputDataLayoutType layout;
2077                     VK_FORMAT_R32_UINT,                  //  vk::VkFormat format;
2078                     1,                                   //  vk::VkDeviceSize numElements;
2079                     subgroups::SSBOData::BindingSSBO,    //  bool isImage;
2080                     4 + ndx,                             //  uint32_t binding;
2081                     stagesBits[ndx],                     //  vk::VkShaderStageFlags stages;
2082                 };
2083             }
2084 
2085             return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, inputCount, DE_NULL,
2086                                         checkVertexPipelineStagesSubgroupElect, stages);
2087         }
2088         else
2089         {
2090             const uint32_t inputDatasCount = DE_LENGTH_OF_ARRAY(stagesBits) * 4u;
2091             subgroups::SSBOData inputDatas[inputDatasCount];
2092 
2093             for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
2094             {
2095                 const uint32_t index = ndx * 4;
2096 
2097                 inputDatas[index].format         = VK_FORMAT_R32_UINT;
2098                 inputDatas[index].layout         = subgroups::SSBOData::LayoutStd430;
2099                 inputDatas[index].numElements    = SHADER_BUFFER_SIZE;
2100                 inputDatas[index].initializeType = subgroups::SSBOData::InitializeNonZero;
2101                 inputDatas[index].binding        = index + 4u;
2102                 inputDatas[index].stages         = stagesBits[ndx];
2103 
2104                 inputDatas[index + 1].format         = VK_FORMAT_R32_UINT;
2105                 inputDatas[index + 1].layout         = subgroups::SSBOData::LayoutStd430;
2106                 inputDatas[index + 1].numElements    = 1;
2107                 inputDatas[index + 1].initializeType = subgroups::SSBOData::InitializeZero;
2108                 inputDatas[index + 1].binding        = index + 5u;
2109                 inputDatas[index + 1].stages         = stagesBits[ndx];
2110 
2111                 inputDatas[index + 2].format         = VK_FORMAT_R32_UINT;
2112                 inputDatas[index + 2].layout         = subgroups::SSBOData::LayoutStd430;
2113                 inputDatas[index + 2].numElements    = 1;
2114                 inputDatas[index + 2].initializeType = subgroups::SSBOData::InitializeNonZero;
2115                 inputDatas[index + 2].binding        = index + 6u;
2116                 inputDatas[index + 2].stages         = stagesBits[ndx];
2117 
2118                 inputDatas[index + 3].format         = VK_FORMAT_R32_UINT;
2119                 inputDatas[index + 3].layout         = subgroups::SSBOData::LayoutStd430;
2120                 inputDatas[index + 3].numElements    = SHADER_BUFFER_SIZE;
2121                 inputDatas[index + 3].initializeType = subgroups::SSBOData::InitializeNone;
2122                 inputDatas[index + 3].bindingType    = subgroups::SSBOData::BindingImage;
2123                 inputDatas[index + 3].binding        = index + 7u;
2124                 inputDatas[index + 3].stages         = stagesBits[ndx];
2125             }
2126 
2127             return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL,
2128                                         checkVertexPipelineStagesSubgroupBarriers, stages);
2129         }
2130     }
2131 #ifndef CTS_USES_VULKANSC
2132     else if (isAllRayTracingStages(caseDef.shaderStage))
2133     {
2134         const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
2135         const VkShaderStageFlags stagesBits[] = {
2136             VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR,      VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
2137             VK_SHADER_STAGE_MISS_BIT_KHR,   VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
2138         };
2139         const uint32_t stagesCount = DE_LENGTH_OF_ARRAY(stagesBits);
2140 
2141         if (OPTYPE_ELECT == caseDef.opType)
2142         {
2143             const uint32_t inputDataCount = stagesCount;
2144             subgroups::SSBOData inputData[inputDataCount];
2145 
2146             for (uint32_t ndx = 0; ndx < inputDataCount; ++ndx)
2147             {
2148                 inputData[ndx].format         = VK_FORMAT_R32_UINT;
2149                 inputData[ndx].layout         = subgroups::SSBOData::LayoutStd430;
2150                 inputData[ndx].numElements    = 1;
2151                 inputData[ndx].initializeType = subgroups::SSBOData::InitializeZero;
2152                 inputData[ndx].binding        = stagesCount + ndx;
2153                 inputData[ndx].stages         = stagesBits[ndx];
2154             }
2155 
2156             return subgroups::allRayTracingStages(context, VK_FORMAT_R32_UINT, inputData, inputDataCount, DE_NULL,
2157                                                   checkVertexPipelineStagesSubgroupElect, stages);
2158         }
2159         else
2160         {
2161             const uint32_t datasPerStage   = 4u;
2162             const uint32_t inputDatasCount = datasPerStage * stagesCount;
2163             subgroups::SSBOData inputDatas[inputDatasCount];
2164 
2165             for (uint32_t ndx = 0; ndx < stagesCount; ++ndx)
2166             {
2167                 const uint32_t index = datasPerStage * ndx;
2168 
2169                 for (uint32_t perStageNdx = 0; perStageNdx < datasPerStage; ++perStageNdx)
2170                 {
2171                     inputDatas[index + perStageNdx].format      = VK_FORMAT_R32_UINT;
2172                     inputDatas[index + perStageNdx].layout      = subgroups::SSBOData::LayoutStd430;
2173                     inputDatas[index + perStageNdx].stages      = stagesBits[ndx];
2174                     inputDatas[index + perStageNdx].bindingType = subgroups::SSBOData::BindingSSBO;
2175                 }
2176 
2177                 inputDatas[index + 0].numElements    = SHADER_BUFFER_SIZE;
2178                 inputDatas[index + 0].initializeType = subgroups::SSBOData::InitializeNonZero;
2179                 inputDatas[index + 0].binding        = index + stagesCount;
2180 
2181                 inputDatas[index + 1].numElements    = 1;
2182                 inputDatas[index + 1].initializeType = subgroups::SSBOData::InitializeZero;
2183                 inputDatas[index + 1].binding        = index + stagesCount + 1u;
2184 
2185                 inputDatas[index + 2].numElements    = 1;
2186                 inputDatas[index + 2].initializeType = subgroups::SSBOData::InitializeNonZero;
2187                 inputDatas[index + 2].binding        = index + stagesCount + 2u;
2188 
2189                 inputDatas[index + 3].numElements    = SHADER_BUFFER_SIZE;
2190                 inputDatas[index + 3].initializeType = subgroups::SSBOData::InitializeNone;
2191                 inputDatas[index + 3].bindingType    = subgroups::SSBOData::BindingImage;
2192                 inputDatas[index + 3].binding        = index + stagesCount + 3u;
2193             }
2194 
2195             return subgroups::allRayTracingStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL,
2196                                                   checkVertexPipelineStagesSubgroupBarriers, stages);
2197         }
2198     }
2199 #endif // CTS_USES_VULKANSC
2200     else
2201         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
2202 }
2203 } // namespace
2204 
2205 namespace vkt
2206 {
2207 namespace subgroups
2208 {
createSubgroupsBasicTests(TestContext & testCtx)2209 TestCaseGroup *createSubgroupsBasicTests(TestContext &testCtx)
2210 {
2211     de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "basic"));
2212     de::MovePtr<TestCaseGroup> graphicGroup(new TestCaseGroup(testCtx, "graphics"));
2213     de::MovePtr<TestCaseGroup> computeGroup(new TestCaseGroup(testCtx, "compute"));
2214     de::MovePtr<TestCaseGroup> meshGroup(new TestCaseGroup(testCtx, "mesh"));
2215     de::MovePtr<TestCaseGroup> framebufferGroup(new TestCaseGroup(testCtx, "framebuffer"));
2216     de::MovePtr<TestCaseGroup> raytracingGroup(new TestCaseGroup(testCtx, "ray_tracing"));
2217     const VkShaderStageFlags fbStages[] = {
2218         VK_SHADER_STAGE_FRAGMENT_BIT,
2219         VK_SHADER_STAGE_VERTEX_BIT,
2220         VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2221         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
2222         VK_SHADER_STAGE_GEOMETRY_BIT,
2223     };
2224 #ifndef CTS_USES_VULKANSC
2225     const VkShaderStageFlags meshStages[] = {
2226         VK_SHADER_STAGE_MESH_BIT_EXT,
2227         VK_SHADER_STAGE_TASK_BIT_EXT,
2228     };
2229 #endif // CTS_USES_VULKANSC
2230     const bool boolValues[] = {false, true};
2231 
2232     for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
2233     {
2234         const OpType opType = static_cast<OpType>(opTypeIndex);
2235         const string op     = de::toLower(getOpTypeName(opType));
2236 
2237         for (size_t groupSizeNdx = 0; groupSizeNdx < DE_LENGTH_OF_ARRAY(boolValues); ++groupSizeNdx)
2238         {
2239             const bool requiredSubgroupSize = boolValues[groupSizeNdx];
2240             const string testNameSuffix     = requiredSubgroupSize ? "_requiredsubgroupsize" : "";
2241             const CaseDefinition caseDef    = {
2242                 opType,                        //  OpType opType;
2243                 VK_SHADER_STAGE_COMPUTE_BIT,   //  VkShaderStageFlags shaderStage;
2244                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
2245                 requiredSubgroupSize           //  bool requiredSubgroupSize;
2246             };
2247             const string testName = op + testNameSuffix;
2248 
2249             addFunctionCaseWithPrograms(computeGroup.get(), testName, supportedCheck, initPrograms, test, caseDef);
2250         }
2251 
2252 #ifndef CTS_USES_VULKANSC
2253         for (size_t groupSizeNdx = 0; groupSizeNdx < DE_LENGTH_OF_ARRAY(boolValues); ++groupSizeNdx)
2254         {
2255             for (const auto &stage : meshStages)
2256             {
2257                 const bool requiredSubgroupSize = boolValues[groupSizeNdx];
2258                 const string testNameSuffix     = requiredSubgroupSize ? "_requiredsubgroupsize" : "";
2259                 const CaseDefinition caseDef    = {
2260                     opType,                        //  OpType opType;
2261                     stage,                         //  VkShaderStageFlags shaderStage;
2262                     de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
2263                     requiredSubgroupSize           //  bool requiredSubgroupSize;
2264                 };
2265                 const string testName = op + testNameSuffix + "_" + getShaderStageName(stage);
2266 
2267                 addFunctionCaseWithPrograms(meshGroup.get(), testName, supportedCheck, initPrograms, test, caseDef);
2268             }
2269         }
2270 #endif // CTS_USES_VULKANSC
2271 
2272         if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opType)
2273         {
2274             // Shared isn't available in non compute shaders.
2275             continue;
2276         }
2277 
2278         {
2279             const CaseDefinition caseDef = {
2280                 opType,                        //  OpType opType;
2281                 VK_SHADER_STAGE_ALL_GRAPHICS,  //  VkShaderStageFlags shaderStage;
2282                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
2283                 false                          //  bool requiredSubgroupSize;
2284             };
2285 
2286             addFunctionCaseWithPrograms(graphicGroup.get(), op, supportedCheck, initPrograms, test, caseDef);
2287         }
2288 
2289 #ifndef CTS_USES_VULKANSC
2290         {
2291             const CaseDefinition caseDef = {
2292                 opType,                        //  OpType opType;
2293                 SHADER_STAGE_ALL_RAY_TRACING,  //  VkShaderStageFlags shaderStage;
2294                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
2295                 false                          //  bool requiredSubgroupSize;
2296             };
2297 
2298             addFunctionCaseWithPrograms(raytracingGroup.get(), op, supportedCheck, initPrograms, test, caseDef);
2299         }
2300 #endif // CTS_USES_VULKANSC
2301 
2302         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
2303         {
2304             if (OPTYPE_ELECT == opType && fbStages[stageIndex] == VK_SHADER_STAGE_FRAGMENT_BIT)
2305                 continue; // This is not tested. I don't know why.
2306 
2307             const CaseDefinition caseDef = {
2308                 opType,                        //  OpType opType;
2309                 fbStages[stageIndex],          //  VkShaderStageFlags shaderStage;
2310                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
2311                 false                          //  bool requiredSubgroupSize;
2312             };
2313             const string testName = op + "_" + getShaderStageName(caseDef.shaderStage);
2314 
2315             addFunctionCaseWithPrograms(framebufferGroup.get(), testName, supportedCheck, initFrameBufferPrograms,
2316                                         noSSBOtest, caseDef);
2317         }
2318     }
2319 
2320     group->addChild(graphicGroup.release());
2321     group->addChild(computeGroup.release());
2322     group->addChild(framebufferGroup.release());
2323     group->addChild(raytracingGroup.release());
2324     group->addChild(meshGroup.release());
2325 
2326     return group.release();
2327 }
2328 
2329 } // namespace subgroups
2330 } // namespace vkt
2331