1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief Subgroups Tests
23  */ /*--------------------------------------------------------------------*/
24 
25 #include "vktSubgroupsBallotMasksTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 
28 #include <string>
29 #include <vector>
30 
31 using namespace tcu;
32 using namespace std;
33 using namespace vk;
34 using namespace vkt;
35 
36 namespace
37 {
38 
39 enum MaskType
40 {
41     MASKTYPE_EQ = 0,
42     MASKTYPE_GE,
43     MASKTYPE_GT,
44     MASKTYPE_LE,
45     MASKTYPE_LT,
46     MASKTYPE_LAST
47 };
48 
49 struct CaseDefinition
50 {
51     MaskType maskType;
52     VkShaderStageFlags shaderStage;
53     de::SharedPtr<bool> geometryPointSizeSupported;
54     bool requiredSubgroupSize;
55 };
56 
checkVertexPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)57 static bool checkVertexPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width, uint32_t)
58 {
59     DE_UNREF(internalData);
60 
61     return subgroups::check(datas, width, 0xf);
62 }
63 
checkComputeOrMesh(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)64 static bool checkComputeOrMesh(const void *internalData, vector<const void *> datas, const uint32_t numWorkgroups[3],
65                                const uint32_t localSize[3], uint32_t)
66 {
67     DE_UNREF(internalData);
68 
69     return subgroups::checkComputeOrMesh(datas, numWorkgroups, localSize, 0xf);
70 }
71 
getMaskTypeName(const MaskType maskType)72 string getMaskTypeName(const MaskType maskType)
73 {
74     switch (maskType)
75     {
76     case MASKTYPE_EQ:
77         return "gl_SubGroupEqMaskARB";
78     case MASKTYPE_GE:
79         return "gl_SubGroupGeMaskARB";
80     case MASKTYPE_GT:
81         return "gl_SubGroupGtMaskARB";
82     case MASKTYPE_LE:
83         return "gl_SubGroupLeMaskARB";
84     case MASKTYPE_LT:
85         return "gl_SubGroupLtMaskARB";
86     default:
87         TCU_THROW(InternalError, "Unsupported mask type");
88     }
89 }
90 
getBodySource(const CaseDefinition & caseDef)91 string getBodySource(const CaseDefinition &caseDef)
92 {
93     string body = "  uint64_t value = " + getMaskTypeName(caseDef.maskType) +
94                   ";\n"
95                   "  bool temp = true;\n";
96 
97     switch (caseDef.maskType)
98     {
99     case MASKTYPE_EQ:
100         body += "  uint64_t mask = uint64_t(1) << gl_SubGroupInvocationARB;\n"
101                 "  temp = (value & mask) != 0;\n";
102         break;
103     case MASKTYPE_GE:
104         body += "  for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
105                 "    uint64_t mask = uint64_t(1) << i;\n"
106                 "    if (i >= gl_SubGroupInvocationARB && (value & mask) == 0)\n"
107                 "       temp = false;\n"
108                 "    if (i < gl_SubGroupInvocationARB && (value & mask) != 0)\n"
109                 "       temp = false;\n"
110                 "  };\n";
111         break;
112     case MASKTYPE_GT:
113         body += "  for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
114                 "    uint64_t mask = uint64_t(1) << i;\n"
115                 "    if (i > gl_SubGroupInvocationARB && (value & mask) == 0)\n"
116                 "       temp = false;\n"
117                 "    if (i <= gl_SubGroupInvocationARB && (value & mask) != 0)\n"
118                 "       temp = false;\n"
119                 "  };\n";
120         break;
121     case MASKTYPE_LE:
122         body += "  for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
123                 "    uint64_t mask = uint64_t(1) << i;\n"
124                 "    if (i <= gl_SubGroupInvocationARB && (value & mask) == 0)\n"
125                 "       temp = false;\n"
126                 "    if (i > gl_SubGroupInvocationARB && (value & mask) != 0)\n"
127                 "       temp = false;\n"
128                 "  };\n";
129         break;
130     case MASKTYPE_LT:
131         body += "  for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
132                 "    uint64_t mask = uint64_t(1) << i;\n"
133                 "    if (i < gl_SubGroupInvocationARB && (value & mask) == 0)\n"
134                 "       temp = false;\n"
135                 "    if (i >= gl_SubGroupInvocationARB && (value & mask) != 0)\n"
136                 "       temp = false;\n"
137                 "  };\n";
138         break;
139     default:
140         TCU_THROW(InternalError, "Unknown mask type");
141     }
142 
143     body += "  uint tempResult = temp ? 0xf : 0x2;\n";
144     body += "  tempRes = tempResult;\n";
145 
146     return body;
147 }
148 
getExtHeader(const CaseDefinition &)149 string getExtHeader(const CaseDefinition &)
150 {
151     return "#extension GL_ARB_shader_ballot: enable\n"
152            "#extension GL_ARB_gpu_shader_int64: enable\n";
153 }
154 
getPerStageHeadDeclarations(const CaseDefinition & caseDef)155 vector<string> getPerStageHeadDeclarations(const CaseDefinition &caseDef)
156 {
157     const uint32_t stageCount = subgroups::getStagesCount(caseDef.shaderStage);
158     const bool fragment       = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
159     vector<string> result(stageCount, string());
160 
161     if (fragment)
162         result.reserve(result.size() + 1);
163 
164     for (size_t i = 0; i < result.size(); ++i)
165     {
166         result[i] = "layout(set = 0, binding = " + de::toString(i) +
167                     ", std430) buffer Buffer1\n"
168                     "{\n"
169                     "  uint result[];\n"
170                     "};\n";
171     }
172 
173     if (fragment)
174     {
175         const string fragPart = "layout(location = 0) out uint result;\n";
176 
177         result.push_back(fragPart);
178     }
179 
180     return result;
181 }
182 
getFramebufferPerStageHeadDeclarations(const CaseDefinition & caseDef)183 vector<string> getFramebufferPerStageHeadDeclarations(const CaseDefinition &caseDef)
184 {
185     vector<string> result;
186 
187     DE_UNREF(caseDef);
188 
189     result.push_back("layout(location = 0) out float result;\n");
190     result.push_back("layout(location = 0) out float out_color;\n");
191     result.push_back("layout(location = 0) out float out_color[];\n");
192     result.push_back("layout(location = 0) out float out_color;\n");
193 
194     return result;
195 }
196 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)197 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
198 {
199     const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
200     const string extHeader                = getExtHeader(caseDef);
201     const string testSrc                  = getBodySource(caseDef);
202     const vector<string> headDeclarations = getFramebufferPerStageHeadDeclarations(caseDef);
203     const bool pointSizeSupported         = *caseDef.geometryPointSizeSupported;
204 
205     subgroups::initStdFrameBufferPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
206                                           pointSizeSupported, extHeader, testSrc, "", headDeclarations);
207 }
208 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)209 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
210 {
211 #ifndef CTS_USES_VULKANSC
212     const bool spirv14required =
213         (isAllRayTracingStages(caseDef.shaderStage) || isAllMeshShadingStages(caseDef.shaderStage));
214 #else
215     const bool spirv14required = false;
216 #endif // CTS_USES_VULKANSC
217     const SpirvVersion spirvVersion = (spirv14required ? SPIRV_VERSION_1_4 : SPIRV_VERSION_1_3);
218     const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spirvVersion, 0u, spirv14required);
219     const string extHeader                = getExtHeader(caseDef);
220     const string testSrc                  = getBodySource(caseDef);
221     const vector<string> headDeclarations = getPerStageHeadDeclarations(caseDef);
222     const bool pointSizeSupport           = *caseDef.geometryPointSizeSupported;
223     const SpirVAsmBuildOptions buildOptionsSpr(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
224 
225     if (isAllComputeStages(caseDef.shaderStage))
226     {
227         string compute = "";
228         switch (caseDef.maskType)
229         {
230         case MASKTYPE_EQ:
231             compute += "; SPIR-V\n"
232                        "; Version: 1.6\n"
233                        "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
234                        "; Bound: 98\n"
235                        "; Schema: 0\n"
236                        "OpCapability Shader\n"
237                        "OpCapability Int64\n"
238                        "OpCapability SubgroupBallotKHR\n"
239                        "OpExtension \"SPV_KHR_shader_ballot\"\n"
240                        "%1 = OpExtInstImport \"GLSL.std.450\"\n"
241                        "OpMemoryModel Logical GLSL450\n"
242                        "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
243                        "%gl_SubGroupEqMaskARB %gl_SubGroupInvocationARB\n"
244                        "OpExecutionMode %main LocalSize 1 1 1\n"
245                        "OpSource GLSL 450\n"
246                        "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
247                        "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
248                        "OpName %main \"main\"\n"
249                        "OpName %globalSize \"globalSize\"\n"
250                        "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
251                        "OpName %offset \"offset\"\n"
252                        "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
253                        "OpName %bitmask \"bitmask\"\n"
254                        "OpName %gl_SubGroupEqMaskARB \"gl_SubGroupEqMaskARB\"\n"
255                        "OpName %temp \"temp\"\n"
256                        "OpName %elementIndex \"elementIndex\"\n"
257                        "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
258                        "OpName %bitPosition \"bitPosition\"\n"
259                        "OpName %mask \"mask\"\n"
260                        "OpName %element \"element\"\n"
261                        "OpName %tempResult \"tempResult\"\n"
262                        "OpName %tempRes \"tempRes\"\n"
263                        "OpName %Buffer1 \"Buffer1\"\n"
264                        "OpMemberName %Buffer1 0 \"result\"\n"
265                        "OpName %_ \"\"\n"
266                        "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
267                        "OpDecorate %19 SpecId 0\n"
268                        "OpDecorate %20 SpecId 1\n"
269                        "OpDecorate %21 SpecId 2\n"
270                        "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
271                        "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
272                        "OpDecorate %gl_SubGroupEqMaskARB BuiltIn SubgroupEqMask\n"
273                        "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
274                        "OpDecorate %_runtimearr_uint ArrayStride 4\n"
275                        "OpMemberDecorate %Buffer1 0 Offset 0\n"
276                        "OpDecorate %Buffer1 BufferBlock\n"
277                        "OpDecorate %_ DescriptorSet 0\n"
278                        "OpDecorate %_ Binding 0\n"
279                        "%void = OpTypeVoid\n"
280                        "%25 = OpTypeFunction %void\n"
281                        "%uint = OpTypeInt 32 0\n"
282                        "%v3uint = OpTypeVector %uint 3\n"
283                        "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
284                        "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
285                        "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
286                        "%19 = OpSpecConstant %uint 1\n"
287                        "%20 = OpSpecConstant %uint 1\n"
288                        "%21 = OpSpecConstant %uint 1\n"
289                        "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %19 %20 %21\n"
290                        "%_ptr_Function_uint = OpTypePointer Function %uint\n"
291                        "%uint_0 = OpConstant %uint 0\n"
292                        "%uint_1 = OpConstant %uint 1\n"
293                        "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
294                        "%uint_2 = OpConstant %uint 2\n"
295                        "%_ptr_Input_uint = OpTypePointer Input %uint\n"
296                        "%int = OpTypeInt 32 1\n"
297                        "%v4uint = OpTypeVector %uint 4\n"
298                        "%uint_4 = OpConstant %uint 4\n"
299                        "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
300                        "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
301                        "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
302                        "%ulong = OpTypeInt 64 0\n"
303                        "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
304                        "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
305                        "%gl_SubGroupEqMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
306                        "%bool = OpTypeBool\n"
307                        "%_ptr_Function_bool = OpTypePointer Function %bool\n"
308                        "%true = OpConstantTrue %bool\n"
309                        "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
310                        "%uint_32 = OpConstant %uint 32\n"
311                        "%_ptr_Function_int = OpTypePointer Function %int\n"
312                        "%int_15 = OpConstant %int 15\n"
313                        "%int_2 = OpConstant %int 2\n"
314                        "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
315                        "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
316                        "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
317                        "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
318                        "%int_0 = OpConstant %int 0\n"
319                        "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
320                        "%main = OpFunction %void None %25\n"
321                        "%54 = OpLabel\n"
322                        "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
323                        "%offset = OpVariable %_ptr_Function_uint Function\n"
324                        "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
325                        "%temp = OpVariable %_ptr_Function_bool Function\n"
326                        "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
327                        "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
328                        "%mask = OpVariable %_ptr_Function_uint Function\n"
329                        "%element = OpVariable %_ptr_Function_uint Function\n"
330                        "%tempResult = OpVariable %_ptr_Function_uint Function\n"
331                        "%tempRes = OpVariable %_ptr_Function_uint Function\n"
332                        "%55 = OpLoad %v3uint %gl_NumWorkGroups\n"
333                        "%56 = OpIMul %v3uint %55 %gl_WorkGroupSize\n"
334                        "OpStore %globalSize %56\n"
335                        "%57 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
336                        "%58 = OpLoad %uint %57\n"
337                        "%59 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
338                        "%60 = OpLoad %uint %59\n"
339                        "%61 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
340                        "%62 = OpLoad %uint %61\n"
341                        "%63 = OpIMul %uint %60 %62\n"
342                        "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
343                        "%65 = OpLoad %uint %64\n"
344                        "%66 = OpIAdd %uint %63 %65\n"
345                        "%67 = OpIMul %uint %58 %66\n"
346                        "%68 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
347                        "%69 = OpLoad %uint %68\n"
348                        "%70 = OpIAdd %uint %67 %69\n"
349                        "OpStore %offset %70\n"
350                        "%71 = OpLoad %v4uint %gl_SubGroupEqMaskARB\n"
351                        "%72 = OpCompositeExtract %uint %71 0\n"
352                        "%73 = OpCompositeExtract %uint %71 1\n"
353                        "%74 = OpCompositeExtract %uint %71 2\n"
354                        "%75 = OpCompositeExtract %uint %71 3\n"
355                        "%76 = OpCompositeConstruct %_arr_uint_uint_4 %72 %73 %74 %75\n"
356                        "OpStore %bitmask %76\n"
357                        "OpStore %temp %true\n"
358                        "%77 = OpLoad %uint %gl_SubGroupInvocationARB\n"
359                        "%78 = OpUDiv %uint %77 %uint_32\n"
360                        "OpStore %elementIndex %78\n"
361                        "%79 = OpLoad %uint %gl_SubGroupInvocationARB\n"
362                        "%80 = OpUMod %uint %79 %uint_32\n"
363                        "OpStore %bitPosition %80\n"
364                        "%81 = OpLoad %uint %bitPosition\n"
365                        "%82 = OpShiftLeftLogical %uint %uint_1 %81\n"
366                        "OpStore %mask %82\n"
367                        "%83 = OpLoad %uint %elementIndex\n"
368                        "%84 = OpAccessChain %_ptr_Function_uint %bitmask %83\n"
369                        "%85 = OpLoad %uint %84\n"
370                        "OpStore %element %85\n"
371                        "%87 = OpLoad %uint %element\n"
372                        "%88 = OpLoad %uint %mask\n"
373                        "%89 = OpBitwiseAnd %uint %87 %88\n"
374                        "%90 = OpINotEqual %bool %89 %uint_0\n"
375                        "OpStore %temp %90\n"
376                        "%91 = OpLoad %bool %temp\n"
377                        "%92 = OpSelect %int %91 %int_15 %int_2\n"
378                        "%93 = OpBitcast %uint %92\n"
379                        "OpStore %tempResult %93\n"
380                        "%94 = OpLoad %uint %tempResult\n"
381                        "OpStore %tempRes %94\n"
382                        "%95 = OpLoad %uint %offset\n"
383                        "%96 = OpLoad %uint %tempRes\n"
384                        "%97 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %95\n"
385                        "OpStore %97 %96\n"
386                        "OpReturn\n"
387                        "OpFunctionEnd\n";
388             break;
389         case MASKTYPE_GE:
390             compute += "; SPIR-V\n"
391                        "; Version: 1.6\n"
392                        "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
393                        "; Bound: 128\n"
394                        "; Schema: 0\n"
395                        "OpCapability Shader\n"
396                        "OpCapability Int64\n"
397                        "OpCapability SubgroupBallotKHR\n"
398                        "OpExtension \"SPV_KHR_shader_ballot\"\n"
399                        "%1 = OpExtInstImport \"GLSL.std.450\"\n"
400                        "OpMemoryModel Logical GLSL450\n"
401                        "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
402                        "%gl_SubGroupGeMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
403                        "OpExecutionMode %main LocalSize 1 1 1\n"
404                        "OpSource GLSL 450\n"
405                        "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
406                        "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
407                        "OpName %main \"main\"\n"
408                        "OpName %globalSize \"globalSize\"\n"
409                        "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
410                        "OpName %offset \"offset\"\n"
411                        "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
412                        "OpName %bitmask \"bitmask\"\n"
413                        "OpName %gl_SubGroupGeMaskARB \"gl_SubGroupGeMaskARB\"\n"
414                        "OpName %temp \"temp\"\n"
415                        "OpName %i \"i\"\n"
416                        "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
417                        "OpName %elementIndex \"elementIndex\"\n"
418                        "OpName %bitPosition \"bitPosition\"\n"
419                        "OpName %mask \"mask\"\n"
420                        "OpName %element \"element\"\n"
421                        "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
422                        "OpName %tempResult \"tempResult\"\n"
423                        "OpName %tempRes \"tempRes\"\n"
424                        "OpName %Buffer1 \"Buffer1\"\n"
425                        "OpMemberName %Buffer1 0 \"result\"\n"
426                        "OpName %_ \"\"\n"
427                        "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
428                        "OpDecorate %21 SpecId 0\n"
429                        "OpDecorate %22 SpecId 1\n"
430                        "OpDecorate %23 SpecId 2\n"
431                        "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
432                        "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
433                        "OpDecorate %gl_SubGroupGeMaskARB BuiltIn SubgroupGeMask\n"
434                        "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
435                        "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
436                        "OpDecorate %_runtimearr_uint ArrayStride 4\n"
437                        "OpMemberDecorate %Buffer1 0 Offset 0\n"
438                        "OpDecorate %Buffer1 BufferBlock\n"
439                        "OpDecorate %_ DescriptorSet 0\n"
440                        "OpDecorate %_ Binding 0\n"
441                        "%void = OpTypeVoid\n"
442                        "%27 = OpTypeFunction %void\n"
443                        "%uint = OpTypeInt 32 0\n"
444                        "%v3uint = OpTypeVector %uint 3\n"
445                        "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
446                        "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
447                        "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
448                        "%21 = OpSpecConstant %uint 1\n"
449                        "%22 = OpSpecConstant %uint 1\n"
450                        "%23 = OpSpecConstant %uint 1\n"
451                        "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
452                        "%_ptr_Function_uint = OpTypePointer Function %uint\n"
453                        "%uint_0 = OpConstant %uint 0\n"
454                        "%uint_1 = OpConstant %uint 1\n"
455                        "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
456                        "%uint_2 = OpConstant %uint 2\n"
457                        "%_ptr_Input_uint = OpTypePointer Input %uint\n"
458                        "%v4uint = OpTypeVector %uint 4\n"
459                        "%uint_4 = OpConstant %uint 4\n"
460                        "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
461                        "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
462                        "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
463                        "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
464                        "%gl_SubGroupGeMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
465                        "%bool = OpTypeBool\n"
466                        "%_ptr_Function_bool = OpTypePointer Function %bool\n"
467                        "%true = OpConstantTrue %bool\n"
468                        "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
469                        "%uint_32 = OpConstant %uint 32\n"
470                        "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
471                        "%false = OpConstantFalse %bool\n"
472                        "%int = OpTypeInt 32 1\n"
473                        "%int_1 = OpConstant %int 1\n"
474                        "%int_15 = OpConstant %int 15\n"
475                        "%int_2 = OpConstant %int 2\n"
476                        "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
477                        "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
478                        "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
479                        "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
480                        "%int_0 = OpConstant %int 0\n"
481                        "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
482                        "%main = OpFunction %void None %27\n"
483                        "%55 = OpLabel\n"
484                        "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
485                        "%offset = OpVariable %_ptr_Function_uint Function\n"
486                        "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
487                        "%temp = OpVariable %_ptr_Function_bool Function\n"
488                        "%i = OpVariable %_ptr_Function_uint Function\n"
489                        "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
490                        "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
491                        "%mask = OpVariable %_ptr_Function_uint Function\n"
492                        "%element = OpVariable %_ptr_Function_uint Function\n"
493                        "%tempResult = OpVariable %_ptr_Function_uint Function\n"
494                        "%tempRes = OpVariable %_ptr_Function_uint Function\n"
495                        "%56 = OpLoad %v3uint %gl_NumWorkGroups\n"
496                        "%57 = OpIMul %v3uint %56 %gl_WorkGroupSize\n"
497                        "OpStore %globalSize %57\n"
498                        "%58 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
499                        "%59 = OpLoad %uint %58\n"
500                        "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
501                        "%61 = OpLoad %uint %60\n"
502                        "%62 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
503                        "%63 = OpLoad %uint %62\n"
504                        "%64 = OpIMul %uint %61 %63\n"
505                        "%65 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
506                        "%66 = OpLoad %uint %65\n"
507                        "%67 = OpIAdd %uint %64 %66\n"
508                        "%68 = OpIMul %uint %59 %67\n"
509                        "%69 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
510                        "%70 = OpLoad %uint %69\n"
511                        "%71 = OpIAdd %uint %68 %70\n"
512                        "OpStore %offset %71\n"
513                        "%72 = OpLoad %v4uint %gl_SubGroupGeMaskARB\n"
514                        "%73 = OpCompositeExtract %uint %72 0\n"
515                        "%74 = OpCompositeExtract %uint %72 1\n"
516                        "%75 = OpCompositeExtract %uint %72 2\n"
517                        "%76 = OpCompositeExtract %uint %72 3\n"
518                        "%77 = OpCompositeConstruct %_arr_uint_uint_4 %73 %74 %75 %76\n"
519                        "OpStore %bitmask %77\n"
520                        "OpStore %temp %true\n"
521                        "OpStore %i %uint_0\n"
522                        "OpBranch %78\n"
523                        "%78 = OpLabel\n"
524                        "OpLoopMerge %79 %80 None\n"
525                        "OpBranch %81\n"
526                        "%81 = OpLabel\n"
527                        "%82 = OpLoad %uint %i\n"
528                        "%83 = OpLoad %uint %gl_SubGroupSizeARB\n"
529                        "%84 = OpULessThan %bool %82 %83\n"
530                        "OpBranchConditional %84 %85 %79\n"
531                        "%85 = OpLabel\n"
532                        "%86 = OpLoad %uint %i\n"
533                        "%87 = OpUDiv %uint %86 %uint_32\n"
534                        "OpStore %elementIndex %87\n"
535                        "%88 = OpLoad %uint %i\n"
536                        "%89 = OpUMod %uint %88 %uint_32\n"
537                        "OpStore %bitPosition %89\n"
538                        "%90 = OpLoad %uint %bitPosition\n"
539                        "%91 = OpShiftLeftLogical %uint %uint_1 %90\n"
540                        "OpStore %mask %91\n"
541                        "%92 = OpLoad %uint %elementIndex\n"
542                        "%93 = OpAccessChain %_ptr_Function_uint %bitmask %92\n"
543                        "%94 = OpLoad %uint %93\n"
544                        "OpStore %element %94\n"
545                        "%95 = OpLoad %uint %i\n"
546                        "%96 = OpLoad %uint %gl_SubGroupInvocationARB\n"
547                        "%97 = OpUGreaterThanEqual %bool %95 %96\n"
548                        "OpSelectionMerge %98 None\n"
549                        "OpBranchConditional %97 %99 %98\n"
550                        "%99 = OpLabel\n"
551                        "%100 = OpLoad %uint %element\n"
552                        "%101 = OpLoad %uint %mask\n"
553                        "%102 = OpBitwiseAnd %uint %100 %101\n"
554                        "%103 = OpIEqual %bool %102 %uint_0\n"
555                        "OpBranch %98\n"
556                        "%98 = OpLabel\n"
557                        "%104 = OpPhi %bool %97 %85 %103 %99\n"
558                        "OpSelectionMerge %105 None\n"
559                        "OpBranchConditional %104 %106 %105\n"
560                        "%106 = OpLabel\n"
561                        "OpStore %temp %false\n"
562                        "OpBranch %105\n"
563                        "%105 = OpLabel\n"
564                        "%107 = OpLoad %uint %i\n"
565                        "%108 = OpLoad %uint %gl_SubGroupInvocationARB\n"
566                        "%109 = OpULessThan %bool %107 %108\n"
567                        "OpSelectionMerge %110 None\n"
568                        "OpBranchConditional %109 %111 %110\n"
569                        "%111 = OpLabel\n"
570                        "%112 = OpLoad %uint %element\n"
571                        "%113 = OpLoad %uint %mask\n"
572                        "%114 = OpBitwiseAnd %uint %112 %113\n"
573                        "%115 = OpINotEqual %bool %114 %uint_0\n"
574                        "OpBranch %110\n"
575                        "%110 = OpLabel\n"
576                        "%116 = OpPhi %bool %109 %105 %115 %111\n"
577                        "OpSelectionMerge %117 None\n"
578                        "OpBranchConditional %116 %118 %117\n"
579                        "%118 = OpLabel\n"
580                        "OpStore %temp %false\n"
581                        "OpBranch %117\n"
582                        "%117 = OpLabel\n"
583                        "OpBranch %80\n"
584                        "%80 = OpLabel\n"
585                        "%119 = OpLoad %uint %i\n"
586                        "%120 = OpIAdd %uint %119 %int_1\n"
587                        "OpStore %i %120\n"
588                        "OpBranch %78\n"
589                        "%79 = OpLabel\n"
590                        "%121 = OpLoad %bool %temp\n"
591                        "%122 = OpSelect %int %121 %int_15 %int_2\n"
592                        "%123 = OpBitcast %uint %122\n"
593                        "OpStore %tempResult %123\n"
594                        "%124 = OpLoad %uint %tempResult\n"
595                        "OpStore %tempRes %124\n"
596                        "%125 = OpLoad %uint %offset\n"
597                        "%126 = OpLoad %uint %tempRes\n"
598                        "%127 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %125\n"
599                        "OpStore %127 %126\n"
600                        "OpReturn\n"
601                        "OpFunctionEnd\n";
602             break;
603         case MASKTYPE_GT:
604             compute += "; SPIR-V\n"
605                        "; Version: 1.6\n"
606                        "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
607                        "; Bound: 130\n"
608                        "; Schema: 0\n"
609                        "OpCapability Shader\n"
610                        "OpCapability Int64\n"
611                        "OpCapability SubgroupBallotKHR\n"
612                        "OpExtension \"SPV_KHR_shader_ballot\"\n"
613                        "%1 = OpExtInstImport \"GLSL.std.450\"\n"
614                        "OpMemoryModel Logical GLSL450\n"
615                        "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
616                        "%gl_SubGroupGtMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
617                        "OpExecutionMode %main LocalSize 1 1 1\n"
618                        "OpSource GLSL 450\n"
619                        "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
620                        "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
621                        "OpName %main \"main\"\n"
622                        "OpName %globalSize \"globalSize\"\n"
623                        "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
624                        "OpName %offset \"offset\"\n"
625                        "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
626                        "OpName %bitmask \"bitmask\"\n"
627                        "OpName %gl_SubGroupGtMaskARB \"gl_SubGroupGtMaskARB\"\n"
628                        "OpName %temp \"temp\"\n"
629                        "OpName %i \"i\"\n"
630                        "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
631                        "OpName %elementIndex \"elementIndex\"\n"
632                        "OpName %bitPosition \"bitPosition\"\n"
633                        "OpName %mask \"mask\"\n"
634                        "OpName %element \"element\"\n"
635                        "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
636                        "OpName %tempResult \"tempResult\"\n"
637                        "OpName %tempRes \"tempRes\"\n"
638                        "OpName %Buffer1 \"Buffer1\"\n"
639                        "OpMemberName %Buffer1 0 \"result\"\n"
640                        "OpName %_ \"\"\n"
641                        "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
642                        "OpDecorate %21 SpecId 0\n"
643                        "OpDecorate %22 SpecId 1\n"
644                        "OpDecorate %23 SpecId 2\n"
645                        "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
646                        "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
647                        "OpDecorate %gl_SubGroupGtMaskARB BuiltIn SubgroupGtMask\n"
648                        "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
649                        "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
650                        "OpDecorate %_runtimearr_uint ArrayStride 4\n"
651                        "OpMemberDecorate %Buffer1 0 Offset 0\n"
652                        "OpDecorate %Buffer1 BufferBlock\n"
653                        "OpDecorate %_ DescriptorSet 0\n"
654                        "OpDecorate %_ Binding 0\n"
655                        "%void = OpTypeVoid\n"
656                        "%27 = OpTypeFunction %void\n"
657                        "%uint = OpTypeInt 32 0\n"
658                        "%v3uint = OpTypeVector %uint 3\n"
659                        "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
660                        "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
661                        "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
662                        "%21 = OpSpecConstant %uint 1\n"
663                        "%22 = OpSpecConstant %uint 1\n"
664                        "%23 = OpSpecConstant %uint 1\n"
665                        "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
666                        "%_ptr_Function_uint = OpTypePointer Function %uint\n"
667                        "%uint_0 = OpConstant %uint 0\n"
668                        "%uint_1 = OpConstant %uint 1\n"
669                        "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
670                        "%uint_2 = OpConstant %uint 2\n"
671                        "%_ptr_Input_uint = OpTypePointer Input %uint\n"
672                        "%v4uint = OpTypeVector %uint 4\n"
673                        "%uint_4 = OpConstant %uint 4\n"
674                        "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
675                        "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
676                        "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
677                        "%ulong = OpTypeInt 64 0\n"
678                        "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
679                        "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
680                        "%gl_SubGroupGtMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
681                        "%bool = OpTypeBool\n"
682                        "%_ptr_Function_bool = OpTypePointer Function %bool\n"
683                        "%true = OpConstantTrue %bool\n"
684                        "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
685                        "%uint_32 = OpConstant %uint 32\n"
686                        "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
687                        "%false = OpConstantFalse %bool\n"
688                        "%int = OpTypeInt 32 1\n"
689                        "%int_1 = OpConstant %int 1\n"
690                        "%int_15 = OpConstant %int 15\n"
691                        "%int_2 = OpConstant %int 2\n"
692                        "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
693                        "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
694                        "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
695                        "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
696                        "%int_0 = OpConstant %int 0\n"
697                        "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
698                        "%main = OpFunction %void None %27\n"
699                        "%57 = OpLabel\n"
700                        "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
701                        "%offset = OpVariable %_ptr_Function_uint Function\n"
702                        "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
703                        "%temp = OpVariable %_ptr_Function_bool Function\n"
704                        "%i = OpVariable %_ptr_Function_uint Function\n"
705                        "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
706                        "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
707                        "%mask = OpVariable %_ptr_Function_uint Function\n"
708                        "%element = OpVariable %_ptr_Function_uint Function\n"
709                        "%tempResult = OpVariable %_ptr_Function_uint Function\n"
710                        "%tempRes = OpVariable %_ptr_Function_uint Function\n"
711                        "%58 = OpLoad %v3uint %gl_NumWorkGroups\n"
712                        "%59 = OpIMul %v3uint %58 %gl_WorkGroupSize\n"
713                        "OpStore %globalSize %59\n"
714                        "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
715                        "%61 = OpLoad %uint %60\n"
716                        "%62 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
717                        "%63 = OpLoad %uint %62\n"
718                        "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
719                        "%65 = OpLoad %uint %64\n"
720                        "%66 = OpIMul %uint %63 %65\n"
721                        "%67 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
722                        "%68 = OpLoad %uint %67\n"
723                        "%69 = OpIAdd %uint %66 %68\n"
724                        "%70 = OpIMul %uint %61 %69\n"
725                        "%71 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
726                        "%72 = OpLoad %uint %71\n"
727                        "%73 = OpIAdd %uint %70 %72\n"
728                        "OpStore %offset %73\n"
729                        "%74 = OpLoad %v4uint %gl_SubGroupGtMaskARB\n"
730                        "%75 = OpCompositeExtract %uint %74 0\n"
731                        "%76 = OpCompositeExtract %uint %74 1\n"
732                        "%77 = OpCompositeExtract %uint %74 2\n"
733                        "%78 = OpCompositeExtract %uint %74 3\n"
734                        "%79 = OpCompositeConstruct %_arr_uint_uint_4 %75 %76 %77 %78\n"
735                        "OpStore %bitmask %79\n"
736                        "OpStore %temp %true\n"
737                        "OpStore %i %uint_0\n"
738                        "OpBranch %80\n"
739                        "%80 = OpLabel\n"
740                        "OpLoopMerge %81 %82 None\n"
741                        "OpBranch %83\n"
742                        "%83 = OpLabel\n"
743                        "%84 = OpLoad %uint %i\n"
744                        "%85 = OpLoad %uint %gl_SubGroupSizeARB\n"
745                        "%86 = OpULessThan %bool %84 %85\n"
746                        "OpBranchConditional %86 %87 %81\n"
747                        "%87 = OpLabel\n"
748                        "%88 = OpLoad %uint %i\n"
749                        "%89 = OpUDiv %uint %88 %uint_32\n"
750                        "OpStore %elementIndex %89\n"
751                        "%90 = OpLoad %uint %i\n"
752                        "%91 = OpUMod %uint %90 %uint_32\n"
753                        "OpStore %bitPosition %91\n"
754                        "%92 = OpLoad %uint %bitPosition\n"
755                        "%93 = OpShiftLeftLogical %uint %uint_1 %92\n"
756                        "OpStore %mask %93\n"
757                        "%94 = OpLoad %uint %elementIndex\n"
758                        "%95 = OpAccessChain %_ptr_Function_uint %bitmask %94\n"
759                        "%96 = OpLoad %uint %95\n"
760                        "OpStore %element %96\n"
761                        "%97 = OpLoad %uint %i\n"
762                        "%98 = OpLoad %uint %gl_SubGroupInvocationARB\n"
763                        "%99 = OpUGreaterThan %bool %97 %98\n"
764                        "OpSelectionMerge %100 None\n"
765                        "OpBranchConditional %99 %101 %100\n"
766                        "%101 = OpLabel\n"
767                        "%102 = OpLoad %uint %element\n"
768                        "%103 = OpLoad %uint %mask\n"
769                        "%104 = OpBitwiseAnd %uint %102 %103\n"
770                        "%105 = OpIEqual %bool %104 %uint_0\n"
771                        "OpBranch %100\n"
772                        "%100 = OpLabel\n"
773                        "%106 = OpPhi %bool %99 %87 %105 %101\n"
774                        "OpSelectionMerge %107 None\n"
775                        "OpBranchConditional %106 %108 %107\n"
776                        "%108 = OpLabel\n"
777                        "OpStore %temp %false\n"
778                        "OpBranch %107\n"
779                        "%107 = OpLabel\n"
780                        "%109 = OpLoad %uint %i\n"
781                        "%110 = OpLoad %uint %gl_SubGroupInvocationARB\n"
782                        "%111 = OpULessThanEqual %bool %109 %110\n"
783                        "OpSelectionMerge %112 None\n"
784                        "OpBranchConditional %111 %113 %112\n"
785                        "%113 = OpLabel\n"
786                        "%114 = OpLoad %uint %element\n"
787                        "%115 = OpLoad %uint %mask\n"
788                        "%116 = OpBitwiseAnd %uint %114 %115\n"
789                        "%117 = OpINotEqual %bool %116 %uint_0\n"
790                        "OpBranch %112\n"
791                        "%112 = OpLabel\n"
792                        "%118 = OpPhi %bool %111 %107 %117 %113\n"
793                        "OpSelectionMerge %119 None\n"
794                        "OpBranchConditional %118 %120 %119\n"
795                        "%120 = OpLabel\n"
796                        "OpStore %temp %false\n"
797                        "OpBranch %119\n"
798                        "%119 = OpLabel\n"
799                        "OpBranch %82\n"
800                        "%82 = OpLabel\n"
801                        "%121 = OpLoad %uint %i\n"
802                        "%122 = OpIAdd %uint %121 %int_1\n"
803                        "OpStore %i %122\n"
804                        "OpBranch %80\n"
805                        "%81 = OpLabel\n"
806                        "%123 = OpLoad %bool %temp\n"
807                        "%124 = OpSelect %int %123 %int_15 %int_2\n"
808                        "%125 = OpBitcast %uint %124\n"
809                        "OpStore %tempResult %125\n"
810                        "%126 = OpLoad %uint %tempResult\n"
811                        "OpStore %tempRes %126\n"
812                        "%127 = OpLoad %uint %offset\n"
813                        "%128 = OpLoad %uint %tempRes\n"
814                        "%129 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %127\n"
815                        "OpStore %129 %128\n"
816                        "OpReturn\n"
817                        "OpFunctionEnd\n";
818             break;
819         case MASKTYPE_LE:
820             compute += "; SPIR-V\n"
821                        "; Version: 1.6\n"
822                        "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
823                        "; Bound: 130\n"
824                        "; Schema: 0\n"
825                        "OpCapability Shader\n"
826                        "OpCapability Int64\n"
827                        "OpCapability SubgroupBallotKHR\n"
828                        "OpExtension \"SPV_KHR_shader_ballot\"\n"
829                        "%1 = OpExtInstImport \"GLSL.std.450\"\n"
830                        "OpMemoryModel Logical GLSL450\n"
831                        "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
832                        "%gl_SubGroupLeMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
833                        "OpExecutionMode %main LocalSize 1 1 1\n"
834                        "OpSource GLSL 450\n"
835                        "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
836                        "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
837                        "OpName %main \"main\"\n"
838                        "OpName %globalSize \"globalSize\"\n"
839                        "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
840                        "OpName %offset \"offset\"\n"
841                        "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
842                        "OpName %bitmask \"bitmask\"\n"
843                        "OpName %gl_SubGroupLeMaskARB \"gl_SubGroupLeMaskARB\"\n"
844                        "OpName %temp \"temp\"\n"
845                        "OpName %i \"i\"\n"
846                        "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
847                        "OpName %elementIndex \"elementIndex\"\n"
848                        "OpName %bitPosition \"bitPosition\"\n"
849                        "OpName %mask \"mask\"\n"
850                        "OpName %element \"element\"\n"
851                        "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
852                        "OpName %tempResult \"tempResult\"\n"
853                        "OpName %tempRes \"tempRes\"\n"
854                        "OpName %Buffer1 \"Buffer1\"\n"
855                        "OpMemberName %Buffer1 0 \"result\"\n"
856                        "OpName %_ \"\"\n"
857                        "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
858                        "OpDecorate %21 SpecId 0\n"
859                        "OpDecorate %22 SpecId 1\n"
860                        "OpDecorate %23 SpecId 2\n"
861                        "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
862                        "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
863                        "OpDecorate %gl_SubGroupLeMaskARB BuiltIn SubgroupLeMask\n"
864                        "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
865                        "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
866                        "OpDecorate %_runtimearr_uint ArrayStride 4\n"
867                        "OpMemberDecorate %Buffer1 0 Offset 0\n"
868                        "OpDecorate %Buffer1 BufferBlock\n"
869                        "OpDecorate %_ DescriptorSet 0\n"
870                        "OpDecorate %_ Binding 0\n"
871                        "%void = OpTypeVoid\n"
872                        "%27 = OpTypeFunction %void\n"
873                        "%uint = OpTypeInt 32 0\n"
874                        "%v3uint = OpTypeVector %uint 3\n"
875                        "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
876                        "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
877                        "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
878                        "%21 = OpSpecConstant %uint 1\n"
879                        "%22 = OpSpecConstant %uint 1\n"
880                        "%23 = OpSpecConstant %uint 1\n"
881                        "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
882                        "%_ptr_Function_uint = OpTypePointer Function %uint\n"
883                        "%uint_0 = OpConstant %uint 0\n"
884                        "%uint_1 = OpConstant %uint 1\n"
885                        "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
886                        "%uint_2 = OpConstant %uint 2\n"
887                        "%_ptr_Input_uint = OpTypePointer Input %uint\n"
888                        "%v4uint = OpTypeVector %uint 4\n"
889                        "%uint_4 = OpConstant %uint 4\n"
890                        "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
891                        "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
892                        "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
893                        "%ulong = OpTypeInt 64 0\n"
894                        "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
895                        "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
896                        "%gl_SubGroupLeMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
897                        "%bool = OpTypeBool\n"
898                        "%_ptr_Function_bool = OpTypePointer Function %bool\n"
899                        "%true = OpConstantTrue %bool\n"
900                        "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
901                        "%uint_32 = OpConstant %uint 32\n"
902                        "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
903                        "%false = OpConstantFalse %bool\n"
904                        "%int = OpTypeInt 32 1\n"
905                        "%int_1 = OpConstant %int 1\n"
906                        "%int_15 = OpConstant %int 15\n"
907                        "%int_2 = OpConstant %int 2\n"
908                        "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
909                        "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
910                        "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
911                        "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
912                        "%int_0 = OpConstant %int 0\n"
913                        "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
914                        "%main = OpFunction %void None %27\n"
915                        "%57 = OpLabel\n"
916                        "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
917                        "%offset = OpVariable %_ptr_Function_uint Function\n"
918                        "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
919                        "%temp = OpVariable %_ptr_Function_bool Function\n"
920                        "%i = OpVariable %_ptr_Function_uint Function\n"
921                        "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
922                        "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
923                        "%mask = OpVariable %_ptr_Function_uint Function\n"
924                        "%element = OpVariable %_ptr_Function_uint Function\n"
925                        "%tempResult = OpVariable %_ptr_Function_uint Function\n"
926                        "%tempRes = OpVariable %_ptr_Function_uint Function\n"
927                        "%58 = OpLoad %v3uint %gl_NumWorkGroups\n"
928                        "%59 = OpIMul %v3uint %58 %gl_WorkGroupSize\n"
929                        "OpStore %globalSize %59\n"
930                        "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
931                        "%61 = OpLoad %uint %60\n"
932                        "%62 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
933                        "%63 = OpLoad %uint %62\n"
934                        "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
935                        "%65 = OpLoad %uint %64\n"
936                        "%66 = OpIMul %uint %63 %65\n"
937                        "%67 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
938                        "%68 = OpLoad %uint %67\n"
939                        "%69 = OpIAdd %uint %66 %68\n"
940                        "%70 = OpIMul %uint %61 %69\n"
941                        "%71 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
942                        "%72 = OpLoad %uint %71\n"
943                        "%73 = OpIAdd %uint %70 %72\n"
944                        "OpStore %offset %73\n"
945                        "%74 = OpLoad %v4uint %gl_SubGroupLeMaskARB\n"
946                        "%75 = OpCompositeExtract %uint %74 0\n"
947                        "%76 = OpCompositeExtract %uint %74 1\n"
948                        "%77 = OpCompositeExtract %uint %74 2\n"
949                        "%78 = OpCompositeExtract %uint %74 3\n"
950                        "%79 = OpCompositeConstruct %_arr_uint_uint_4 %75 %76 %77 %78\n"
951                        "OpStore %bitmask %79\n"
952                        "OpStore %temp %true\n"
953                        "OpStore %i %uint_0\n"
954                        "OpBranch %80\n"
955                        "%80 = OpLabel\n"
956                        "OpLoopMerge %81 %82 None\n"
957                        "OpBranch %83\n"
958                        "%83 = OpLabel\n"
959                        "%84 = OpLoad %uint %i\n"
960                        "%85 = OpLoad %uint %gl_SubGroupSizeARB\n"
961                        "%86 = OpULessThan %bool %84 %85\n"
962                        "OpBranchConditional %86 %87 %81\n"
963                        "%87 = OpLabel\n"
964                        "%88 = OpLoad %uint %i\n"
965                        "%89 = OpUDiv %uint %88 %uint_32\n"
966                        "OpStore %elementIndex %89\n"
967                        "%90 = OpLoad %uint %i\n"
968                        "%91 = OpUMod %uint %90 %uint_32\n"
969                        "OpStore %bitPosition %91\n"
970                        "%92 = OpLoad %uint %bitPosition\n"
971                        "%93 = OpShiftLeftLogical %uint %uint_1 %92\n"
972                        "OpStore %mask %93\n"
973                        "%94 = OpLoad %uint %elementIndex\n"
974                        "%95 = OpAccessChain %_ptr_Function_uint %bitmask %94\n"
975                        "%96 = OpLoad %uint %95\n"
976                        "OpStore %element %96\n"
977                        "%97 = OpLoad %uint %i\n"
978                        "%98 = OpLoad %uint %gl_SubGroupInvocationARB\n"
979                        "%99 = OpULessThanEqual %bool %97 %98\n"
980                        "OpSelectionMerge %100 None\n"
981                        "OpBranchConditional %99 %101 %100\n"
982                        "%101 = OpLabel\n"
983                        "%102 = OpLoad %uint %element\n"
984                        "%103 = OpLoad %uint %mask\n"
985                        "%104 = OpBitwiseAnd %uint %102 %103\n"
986                        "%105 = OpIEqual %bool %104 %uint_0\n"
987                        "OpBranch %100\n"
988                        "%100 = OpLabel\n"
989                        "%106 = OpPhi %bool %99 %87 %105 %101\n"
990                        "OpSelectionMerge %107 None\n"
991                        "OpBranchConditional %106 %108 %107\n"
992                        "%108 = OpLabel\n"
993                        "OpStore %temp %false\n"
994                        "OpBranch %107\n"
995                        "%107 = OpLabel\n"
996                        "%109 = OpLoad %uint %i\n"
997                        "%110 = OpLoad %uint %gl_SubGroupInvocationARB\n"
998                        "%111 = OpUGreaterThan %bool %109 %110\n"
999                        "OpSelectionMerge %112 None\n"
1000                        "OpBranchConditional %111 %113 %112\n"
1001                        "%113 = OpLabel\n"
1002                        "%114 = OpLoad %uint %element\n"
1003                        "%115 = OpLoad %uint %mask\n"
1004                        "%116 = OpBitwiseAnd %uint %114 %115\n"
1005                        "%117 = OpINotEqual %bool %116 %uint_0\n"
1006                        "OpBranch %112\n"
1007                        "%112 = OpLabel\n"
1008                        "%118 = OpPhi %bool %111 %107 %117 %113\n"
1009                        "OpSelectionMerge %119 None\n"
1010                        "OpBranchConditional %118 %120 %119\n"
1011                        "%120 = OpLabel\n"
1012                        "OpStore %temp %false\n"
1013                        "OpBranch %119\n"
1014                        "%119 = OpLabel\n"
1015                        "OpBranch %82\n"
1016                        "%82 = OpLabel\n"
1017                        "%121 = OpLoad %uint %i\n"
1018                        "%122 = OpIAdd %uint %121 %int_1\n"
1019                        "OpStore %i %122\n"
1020                        "OpBranch %80\n"
1021                        "%81 = OpLabel\n"
1022                        "%123 = OpLoad %bool %temp\n"
1023                        "%124 = OpSelect %int %123 %int_15 %int_2\n"
1024                        "%125 = OpBitcast %uint %124\n"
1025                        "OpStore %tempResult %125\n"
1026                        "%126 = OpLoad %uint %tempResult\n"
1027                        "OpStore %tempRes %126\n"
1028                        "%127 = OpLoad %uint %offset\n"
1029                        "%128 = OpLoad %uint %tempRes\n"
1030                        "%129 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %127\n"
1031                        "OpStore %129 %128\n"
1032                        "OpReturn\n"
1033                        "OpFunctionEnd\n";
1034             break;
1035         case MASKTYPE_LT:
1036             compute += "; SPIR-V\n"
1037                        "; Version: 1.6\n"
1038                        "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
1039                        "; Bound: 130\n"
1040                        "; Schema: 0\n"
1041                        "OpCapability Shader\n"
1042                        "OpCapability Int64\n"
1043                        "OpCapability SubgroupBallotKHR\n"
1044                        "OpExtension \"SPV_KHR_shader_ballot\"\n"
1045                        "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1046                        "OpMemoryModel Logical GLSL450\n"
1047                        "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
1048                        "%gl_SubGroupLtMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
1049                        "OpExecutionMode %main LocalSize 1 1 1\n"
1050                        "OpSource GLSL 450\n"
1051                        "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
1052                        "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
1053                        "OpName %main \"main\"\n"
1054                        "OpName %globalSize \"globalSize\"\n"
1055                        "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
1056                        "OpName %offset \"offset\"\n"
1057                        "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
1058                        "OpName %bitmask \"bitmask\"\n"
1059                        "OpName %gl_SubGroupLtMaskARB \"gl_SubGroupLtMaskARB\"\n"
1060                        "OpName %temp \"temp\"\n"
1061                        "OpName %i \"i\"\n"
1062                        "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
1063                        "OpName %elementIndex \"elementIndex\"\n"
1064                        "OpName %bitPosition \"bitPosition\"\n"
1065                        "OpName %mask \"mask\"\n"
1066                        "OpName %element \"element\"\n"
1067                        "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
1068                        "OpName %tempResult \"tempResult\"\n"
1069                        "OpName %tempRes \"tempRes\"\n"
1070                        "OpName %Buffer1 \"Buffer1\"\n"
1071                        "OpMemberName %Buffer1 0 \"result\"\n"
1072                        "OpName %_ \"\"\n"
1073                        "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
1074                        "OpDecorate %21 SpecId 0\n"
1075                        "OpDecorate %22 SpecId 1\n"
1076                        "OpDecorate %23 SpecId 2\n"
1077                        "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
1078                        "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
1079                        "OpDecorate %gl_SubGroupLtMaskARB BuiltIn SubgroupLtMask\n"
1080                        "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
1081                        "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
1082                        "OpDecorate %_runtimearr_uint ArrayStride 4\n"
1083                        "OpMemberDecorate %Buffer1 0 Offset 0\n"
1084                        "OpDecorate %Buffer1 BufferBlock\n"
1085                        "OpDecorate %_ DescriptorSet 0\n"
1086                        "OpDecorate %_ Binding 0\n"
1087                        "%void = OpTypeVoid\n"
1088                        "%27 = OpTypeFunction %void\n"
1089                        "%uint = OpTypeInt 32 0\n"
1090                        "%v3uint = OpTypeVector %uint 3\n"
1091                        "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
1092                        "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
1093                        "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
1094                        "%21 = OpSpecConstant %uint 1\n"
1095                        "%22 = OpSpecConstant %uint 1\n"
1096                        "%23 = OpSpecConstant %uint 1\n"
1097                        "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
1098                        "%_ptr_Function_uint = OpTypePointer Function %uint\n"
1099                        "%uint_0 = OpConstant %uint 0\n"
1100                        "%uint_1 = OpConstant %uint 1\n"
1101                        "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
1102                        "%uint_2 = OpConstant %uint 2\n"
1103                        "%_ptr_Input_uint = OpTypePointer Input %uint\n"
1104                        "%v4uint = OpTypeVector %uint 4\n"
1105                        "%uint_4 = OpConstant %uint 4\n"
1106                        "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
1107                        "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
1108                        "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
1109                        "%ulong = OpTypeInt 64 0\n"
1110                        "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
1111                        "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
1112                        "%gl_SubGroupLtMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
1113                        "%bool = OpTypeBool\n"
1114                        "%_ptr_Function_bool = OpTypePointer Function %bool\n"
1115                        "%true = OpConstantTrue %bool\n"
1116                        "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
1117                        "%uint_32 = OpConstant %uint 32\n"
1118                        "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
1119                        "%false = OpConstantFalse %bool\n"
1120                        "%int = OpTypeInt 32 1\n"
1121                        "%int_1 = OpConstant %int 1\n"
1122                        "%int_15 = OpConstant %int 15\n"
1123                        "%int_2 = OpConstant %int 2\n"
1124                        "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
1125                        "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
1126                        "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
1127                        "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
1128                        "%int_0 = OpConstant %int 0\n"
1129                        "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
1130                        "%main = OpFunction %void None %27\n"
1131                        "%57 = OpLabel\n"
1132                        "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
1133                        "%offset = OpVariable %_ptr_Function_uint Function\n"
1134                        "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
1135                        "%temp = OpVariable %_ptr_Function_bool Function\n"
1136                        "%i = OpVariable %_ptr_Function_uint Function\n"
1137                        "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
1138                        "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
1139                        "%mask = OpVariable %_ptr_Function_uint Function\n"
1140                        "%element = OpVariable %_ptr_Function_uint Function\n"
1141                        "%tempResult = OpVariable %_ptr_Function_uint Function\n"
1142                        "%tempRes = OpVariable %_ptr_Function_uint Function\n"
1143                        "%58 = OpLoad %v3uint %gl_NumWorkGroups\n"
1144                        "%59 = OpIMul %v3uint %58 %gl_WorkGroupSize\n"
1145                        "OpStore %globalSize %59\n"
1146                        "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
1147                        "%61 = OpLoad %uint %60\n"
1148                        "%62 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
1149                        "%63 = OpLoad %uint %62\n"
1150                        "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
1151                        "%65 = OpLoad %uint %64\n"
1152                        "%66 = OpIMul %uint %63 %65\n"
1153                        "%67 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
1154                        "%68 = OpLoad %uint %67\n"
1155                        "%69 = OpIAdd %uint %66 %68\n"
1156                        "%70 = OpIMul %uint %61 %69\n"
1157                        "%71 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
1158                        "%72 = OpLoad %uint %71\n"
1159                        "%73 = OpIAdd %uint %70 %72\n"
1160                        "OpStore %offset %73\n"
1161                        "%74 = OpLoad %v4uint %gl_SubGroupLtMaskARB\n"
1162                        "%75 = OpCompositeExtract %uint %74 0\n"
1163                        "%76 = OpCompositeExtract %uint %74 1\n"
1164                        "%77 = OpCompositeExtract %uint %74 2\n"
1165                        "%78 = OpCompositeExtract %uint %74 3\n"
1166                        "%79 = OpCompositeConstruct %_arr_uint_uint_4 %75 %76 %77 %78\n"
1167                        "OpStore %bitmask %79\n"
1168                        "OpStore %temp %true\n"
1169                        "OpStore %i %uint_0\n"
1170                        "OpBranch %80\n"
1171                        "%80 = OpLabel\n"
1172                        "OpLoopMerge %81 %82 None\n"
1173                        "OpBranch %83\n"
1174                        "%83 = OpLabel\n"
1175                        "%84 = OpLoad %uint %i\n"
1176                        "%85 = OpLoad %uint %gl_SubGroupSizeARB\n"
1177                        "%86 = OpULessThan %bool %84 %85\n"
1178                        "OpBranchConditional %86 %87 %81\n"
1179                        "%87 = OpLabel\n"
1180                        "%88 = OpLoad %uint %i\n"
1181                        "%89 = OpUDiv %uint %88 %uint_32\n"
1182                        "OpStore %elementIndex %89\n"
1183                        "%90 = OpLoad %uint %i\n"
1184                        "%91 = OpUMod %uint %90 %uint_32\n"
1185                        "OpStore %bitPosition %91\n"
1186                        "%92 = OpLoad %uint %bitPosition\n"
1187                        "%93 = OpShiftLeftLogical %uint %uint_1 %92\n"
1188                        "OpStore %mask %93\n"
1189                        "%94 = OpLoad %uint %elementIndex\n"
1190                        "%95 = OpAccessChain %_ptr_Function_uint %bitmask %94\n"
1191                        "%96 = OpLoad %uint %95\n"
1192                        "OpStore %element %96\n"
1193                        "%97 = OpLoad %uint %i\n"
1194                        "%98 = OpLoad %uint %gl_SubGroupInvocationARB\n"
1195                        "%99 = OpULessThan %bool %97 %98\n"
1196                        "OpSelectionMerge %100 None\n"
1197                        "OpBranchConditional %99 %101 %100\n"
1198                        "%101 = OpLabel\n"
1199                        "%102 = OpLoad %uint %element\n"
1200                        "%103 = OpLoad %uint %mask\n"
1201                        "%104 = OpBitwiseAnd %uint %102 %103\n"
1202                        "%105 = OpIEqual %bool %104 %uint_0\n"
1203                        "OpBranch %100\n"
1204                        "%100 = OpLabel\n"
1205                        "%106 = OpPhi %bool %99 %87 %105 %101\n"
1206                        "OpSelectionMerge %107 None\n"
1207                        "OpBranchConditional %106 %108 %107\n"
1208                        "%108 = OpLabel\n"
1209                        "OpStore %temp %false\n"
1210                        "OpBranch %107\n"
1211                        "%107 = OpLabel\n"
1212                        "%109 = OpLoad %uint %i\n"
1213                        "%110 = OpLoad %uint %gl_SubGroupInvocationARB\n"
1214                        "%111 = OpUGreaterThanEqual %bool %109 %110\n"
1215                        "OpSelectionMerge %112 None\n"
1216                        "OpBranchConditional %111 %113 %112\n"
1217                        "%113 = OpLabel\n"
1218                        "%114 = OpLoad %uint %element\n"
1219                        "%115 = OpLoad %uint %mask\n"
1220                        "%116 = OpBitwiseAnd %uint %114 %115\n"
1221                        "%117 = OpINotEqual %bool %116 %uint_0\n"
1222                        "OpBranch %112\n"
1223                        "%112 = OpLabel\n"
1224                        "%118 = OpPhi %bool %111 %107 %117 %113\n"
1225                        "OpSelectionMerge %119 None\n"
1226                        "OpBranchConditional %118 %120 %119\n"
1227                        "%120 = OpLabel\n"
1228                        "OpStore %temp %false\n"
1229                        "OpBranch %119\n"
1230                        "%119 = OpLabel\n"
1231                        "OpBranch %82\n"
1232                        "%82 = OpLabel\n"
1233                        "%121 = OpLoad %uint %i\n"
1234                        "%122 = OpIAdd %uint %121 %int_1\n"
1235                        "OpStore %i %122\n"
1236                        "OpBranch %80\n"
1237                        "%81 = OpLabel\n"
1238                        "%123 = OpLoad %bool %temp\n"
1239                        "%124 = OpSelect %int %123 %int_15 %int_2\n"
1240                        "%125 = OpBitcast %uint %124\n"
1241                        "OpStore %tempResult %125\n"
1242                        "%126 = OpLoad %uint %tempResult\n"
1243                        "OpStore %tempRes %126\n"
1244                        "%127 = OpLoad %uint %offset\n"
1245                        "%128 = OpLoad %uint %tempRes\n"
1246                        "%129 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %127\n"
1247                        "OpStore %129 %128\n"
1248                        "OpReturn\n"
1249                        "OpFunctionEnd\n";
1250             break;
1251         default:
1252             TCU_THROW(InternalError, "Unknown mask type");
1253         }
1254         programCollection.spirvAsmSources.add("comp") << compute << buildOptionsSpr;
1255     }
1256     else
1257     {
1258         subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
1259                                    pointSizeSupport, extHeader, testSrc, "", headDeclarations);
1260     }
1261 }
1262 
supportedCheck(Context & context,CaseDefinition caseDef)1263 void supportedCheck(Context &context, CaseDefinition caseDef)
1264 {
1265     if (!subgroups::isSubgroupSupported(context))
1266         TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
1267 
1268     if (!context.requireDeviceFunctionality("VK_EXT_shader_subgroup_ballot"))
1269     {
1270         TCU_THROW(NotSupportedError, "Device does not support VK_EXT_shader_subgroup_ballot extension");
1271     }
1272 
1273     if (!subgroups::isInt64SupportedForDevice(context))
1274         TCU_THROW(NotSupportedError, "Int64 is not supported");
1275 
1276     if (caseDef.requiredSubgroupSize)
1277     {
1278         context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
1279 
1280 #ifndef CTS_USES_VULKANSC
1281         const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
1282             context.getSubgroupSizeControlFeatures();
1283         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1284             context.getSubgroupSizeControlProperties();
1285 #else
1286         const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
1287             context.getSubgroupSizeControlFeaturesEXT();
1288         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1289             context.getSubgroupSizeControlPropertiesEXT();
1290 #endif // CTS_USES_VULKANSC
1291 
1292         if (subgroupSizeControlFeatures.subgroupSizeControl == false)
1293             TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
1294 
1295         if (subgroupSizeControlFeatures.computeFullSubgroups == false)
1296             TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
1297 
1298         if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
1299             TCU_THROW(NotSupportedError, "Required subgroup size is not supported for shader stage");
1300     }
1301 
1302     *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
1303 
1304 #ifndef CTS_USES_VULKANSC
1305     if (isAllRayTracingStages(caseDef.shaderStage))
1306     {
1307         context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
1308     }
1309     else if (isAllMeshShadingStages(caseDef.shaderStage))
1310     {
1311         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1312         context.requireDeviceFunctionality("VK_EXT_mesh_shader");
1313 
1314         if ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u)
1315         {
1316             const auto &features = context.getMeshShaderFeaturesEXT();
1317             if (!features.taskShader)
1318                 TCU_THROW(NotSupportedError, "Task shaders not supported");
1319         }
1320     }
1321 #endif // CTS_USES_VULKANSC
1322 
1323     subgroups::supportedCheckShader(context, caseDef.shaderStage);
1324 }
1325 
noSSBOtest(Context & context,const CaseDefinition caseDef)1326 TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
1327 {
1328     switch (caseDef.shaderStage)
1329     {
1330     case VK_SHADER_STAGE_VERTEX_BIT:
1331         return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1332                                                     checkVertexPipelineStages);
1333     case VK_SHADER_STAGE_GEOMETRY_BIT:
1334         return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1335                                                       checkVertexPipelineStages);
1336     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1337         return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1338                                                                     checkVertexPipelineStages);
1339     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1340         return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1341                                                                     checkVertexPipelineStages);
1342     default:
1343         TCU_THROW(InternalError, "Unhandled shader stage");
1344     }
1345 }
1346 
test(Context & context,const CaseDefinition caseDef)1347 TestStatus test(Context &context, const CaseDefinition caseDef)
1348 {
1349     const bool isCompute = isAllComputeStages(caseDef.shaderStage);
1350 #ifndef CTS_USES_VULKANSC
1351     const bool isMesh = isAllMeshShadingStages(caseDef.shaderStage);
1352 #else
1353     const bool isMesh = false;
1354 #endif // CTS_USES_VULKANSC
1355     DE_ASSERT(!(isCompute && isMesh));
1356 
1357     if (isCompute || isMesh)
1358     {
1359 #ifndef CTS_USES_VULKANSC
1360         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1361             context.getSubgroupSizeControlProperties();
1362 #else
1363         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1364             context.getSubgroupSizeControlPropertiesEXT();
1365 #endif // CTS_USES_VULKANSC
1366         TestLog &log = context.getTestContext().getLog();
1367 
1368         if (caseDef.requiredSubgroupSize == false)
1369         {
1370             if (isCompute)
1371                 return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeOrMesh);
1372             else
1373                 return subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, nullptr, 0, nullptr, checkComputeOrMesh);
1374         }
1375 
1376         // gl_SubGroup*MaskARB are uint64_t, so we limit max subgroup size to 64 for this test
1377         uint32_t maxSubgroupSize = min(subgroupSizeControlProperties.maxSubgroupSize, 64U);
1378 
1379         log << TestLog::Message << "Testing required subgroup size range ["
1380             << subgroupSizeControlProperties.minSubgroupSize << ", " << maxSubgroupSize
1381             << "]" << TestLog::EndMessage;
1382 
1383         // According to the spec, requiredSubgroupSize must be a power-of-two integer.
1384         for (uint32_t size = subgroupSizeControlProperties.minSubgroupSize;
1385              size <= maxSubgroupSize; size *= 2)
1386         {
1387             TestStatus result(QP_TEST_RESULT_INTERNAL_ERROR, "Internal Error");
1388 
1389             if (isCompute)
1390                 result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0u, DE_NULL,
1391                                                     checkComputeOrMesh, size);
1392             else
1393                 result = subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, nullptr, 0u, nullptr, checkComputeOrMesh,
1394                                                  size);
1395 
1396             if (result.getCode() != QP_TEST_RESULT_PASS)
1397             {
1398                 log << TestLog::Message << "subgroupSize " << size << " failed" << TestLog::EndMessage;
1399                 return result;
1400             }
1401         }
1402 
1403         return TestStatus::pass("OK");
1404     }
1405     else if (isAllGraphicsStages(caseDef.shaderStage))
1406     {
1407         const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
1408 
1409         return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages,
1410                                     stages);
1411     }
1412 #ifndef CTS_USES_VULKANSC
1413     else if (isAllRayTracingStages(caseDef.shaderStage))
1414     {
1415         const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
1416 
1417         return subgroups::allRayTracingStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1418                                               checkVertexPipelineStages, stages);
1419     }
1420 #endif // CTS_USES_VULKANSC
1421     else
1422         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1423 }
1424 } // namespace
1425 
1426 namespace vkt
1427 {
1428 namespace subgroups
1429 {
createSubgroupsBallotMasksTests(TestContext & testCtx)1430 TestCaseGroup *createSubgroupsBallotMasksTests(TestContext &testCtx)
1431 {
1432     de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "ballot_mask"));
1433     de::MovePtr<TestCaseGroup> groupARB(new TestCaseGroup(testCtx, "ext_shader_subgroup_ballot"));
1434     de::MovePtr<TestCaseGroup> graphicGroup(new TestCaseGroup(testCtx, "graphics"));
1435     de::MovePtr<TestCaseGroup> computeGroup(new TestCaseGroup(testCtx, "compute"));
1436     de::MovePtr<TestCaseGroup> framebufferGroup(new TestCaseGroup(testCtx, "framebuffer"));
1437 #ifndef CTS_USES_VULKANSC
1438     de::MovePtr<TestCaseGroup> raytracingGroup(new TestCaseGroup(testCtx, "ray_tracing"));
1439     de::MovePtr<TestCaseGroup> meshGroup(new TestCaseGroup(testCtx, "mesh"));
1440 #endif // CTS_USES_VULKANSC
1441     const VkShaderStageFlags fbStages[] = {
1442         VK_SHADER_STAGE_VERTEX_BIT,
1443         VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1444         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1445         VK_SHADER_STAGE_GEOMETRY_BIT,
1446     };
1447 #ifndef CTS_USES_VULKANSC
1448     const VkShaderStageFlags meshStages[] = {
1449         VK_SHADER_STAGE_MESH_BIT_EXT,
1450         VK_SHADER_STAGE_TASK_BIT_EXT,
1451     };
1452 #endif // CTS_USES_VULKANSC
1453     const bool boolValues[] = {false, true};
1454 
1455     for (int maskTypeIndex = 0; maskTypeIndex < MASKTYPE_LAST; ++maskTypeIndex)
1456     {
1457         const MaskType maskType = static_cast<MaskType>(maskTypeIndex);
1458         const string mask       = de::toLower(getMaskTypeName(maskType));
1459 
1460         for (size_t groupSizeNdx = 0; groupSizeNdx < DE_LENGTH_OF_ARRAY(boolValues); ++groupSizeNdx)
1461         {
1462             const bool requiredSubgroupSize = boolValues[groupSizeNdx];
1463             const string testName           = mask + (requiredSubgroupSize ? "_requiredsubgroupsize" : "");
1464             const CaseDefinition caseDef    = {
1465                 maskType,                      //  MaskType maskType;
1466                 VK_SHADER_STAGE_COMPUTE_BIT,   //  VkShaderStageFlags shaderStage;
1467                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
1468                 requiredSubgroupSize,          //  bool requiredSubgroupSize;
1469             };
1470 
1471             addFunctionCaseWithPrograms(computeGroup.get(), testName, supportedCheck, initPrograms, test, caseDef);
1472         }
1473 
1474 #ifndef CTS_USES_VULKANSC
1475         for (size_t groupSizeNdx = 0; groupSizeNdx < DE_LENGTH_OF_ARRAY(boolValues); ++groupSizeNdx)
1476         {
1477             for (const auto &stage : meshStages)
1478             {
1479                 const bool requiredSubgroupSize = boolValues[groupSizeNdx];
1480                 const string testName           = mask + (requiredSubgroupSize ? "_requiredsubgroupsize" : "");
1481                 const CaseDefinition caseDef    = {
1482                     maskType,                      //  MaskType maskType;
1483                     stage,                         //  VkShaderStageFlags shaderStage;
1484                     de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
1485                     requiredSubgroupSize,          //  bool requiredSubgroupSize;
1486                 };
1487 
1488                 addFunctionCaseWithPrograms(meshGroup.get(), testName + "_" + getShaderStageName(stage), supportedCheck,
1489                                             initPrograms, test, caseDef);
1490             }
1491         }
1492 #endif // CTS_USES_VULKANSC
1493 
1494         {
1495             const CaseDefinition caseDef = {
1496                 maskType,                      //  MaskType maskType;
1497                 VK_SHADER_STAGE_ALL_GRAPHICS,  //  VkShaderStageFlags shaderStage;
1498                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
1499                 false                          //  bool requiredSubgroupSize;
1500             };
1501 
1502             addFunctionCaseWithPrograms(graphicGroup.get(), mask, supportedCheck, initPrograms, test, caseDef);
1503         }
1504 
1505 #ifndef CTS_USES_VULKANSC
1506         {
1507             const CaseDefinition caseDef = {
1508                 maskType,                      //  MaskType maskType;
1509                 SHADER_STAGE_ALL_RAY_TRACING,  //  VkShaderStageFlags shaderStage;
1510                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
1511                 false                          //  bool requiredSubgroupSize;
1512             };
1513 
1514             addFunctionCaseWithPrograms(raytracingGroup.get(), mask, supportedCheck, initPrograms, test, caseDef);
1515         }
1516 #endif // CTS_USES_VULKANSC
1517 
1518         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1519         {
1520             const CaseDefinition caseDef = {
1521                 maskType,                      //  MaskType maskType;
1522                 fbStages[stageIndex],          //  VkShaderStageFlags shaderStage;
1523                 de::SharedPtr<bool>(new bool), //  de::SharedPtr<bool> geometryPointSizeSupported;
1524                 false                          //  bool requiredSubgroupSize;
1525             };
1526             const string testName = mask + "_" + getShaderStageName(caseDef.shaderStage);
1527 
1528             addFunctionCaseWithPrograms(framebufferGroup.get(), testName, supportedCheck, initFrameBufferPrograms,
1529                                         noSSBOtest, caseDef);
1530         }
1531     }
1532 
1533     groupARB->addChild(graphicGroup.release());
1534     groupARB->addChild(computeGroup.release());
1535     groupARB->addChild(framebufferGroup.release());
1536 #ifndef CTS_USES_VULKANSC
1537     groupARB->addChild(raytracingGroup.release());
1538     groupARB->addChild(meshGroup.release());
1539 #endif // CTS_USES_VULKANSC
1540     group->addChild(groupARB.release());
1541 
1542     return group.release();
1543 }
1544 
1545 } // namespace subgroups
1546 } // namespace vkt
1547