1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 */ /*!
21 * \file
22 * \brief Subgroups Tests
23 */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsBallotMasksTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27
28 #include <string>
29 #include <vector>
30
31 using namespace tcu;
32 using namespace std;
33 using namespace vk;
34 using namespace vkt;
35
36 namespace
37 {
38
39 enum MaskType
40 {
41 MASKTYPE_EQ = 0,
42 MASKTYPE_GE,
43 MASKTYPE_GT,
44 MASKTYPE_LE,
45 MASKTYPE_LT,
46 MASKTYPE_LAST
47 };
48
49 struct CaseDefinition
50 {
51 MaskType maskType;
52 VkShaderStageFlags shaderStage;
53 de::SharedPtr<bool> geometryPointSizeSupported;
54 bool requiredSubgroupSize;
55 };
56
checkVertexPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)57 static bool checkVertexPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width, uint32_t)
58 {
59 DE_UNREF(internalData);
60
61 return subgroups::check(datas, width, 0xf);
62 }
63
checkComputeOrMesh(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)64 static bool checkComputeOrMesh(const void *internalData, vector<const void *> datas, const uint32_t numWorkgroups[3],
65 const uint32_t localSize[3], uint32_t)
66 {
67 DE_UNREF(internalData);
68
69 return subgroups::checkComputeOrMesh(datas, numWorkgroups, localSize, 0xf);
70 }
71
getMaskTypeName(const MaskType maskType)72 string getMaskTypeName(const MaskType maskType)
73 {
74 switch (maskType)
75 {
76 case MASKTYPE_EQ:
77 return "gl_SubGroupEqMaskARB";
78 case MASKTYPE_GE:
79 return "gl_SubGroupGeMaskARB";
80 case MASKTYPE_GT:
81 return "gl_SubGroupGtMaskARB";
82 case MASKTYPE_LE:
83 return "gl_SubGroupLeMaskARB";
84 case MASKTYPE_LT:
85 return "gl_SubGroupLtMaskARB";
86 default:
87 TCU_THROW(InternalError, "Unsupported mask type");
88 }
89 }
90
getBodySource(const CaseDefinition & caseDef)91 string getBodySource(const CaseDefinition &caseDef)
92 {
93 string body = " uint64_t value = " + getMaskTypeName(caseDef.maskType) +
94 ";\n"
95 " bool temp = true;\n";
96
97 switch (caseDef.maskType)
98 {
99 case MASKTYPE_EQ:
100 body += " uint64_t mask = uint64_t(1) << gl_SubGroupInvocationARB;\n"
101 " temp = (value & mask) != 0;\n";
102 break;
103 case MASKTYPE_GE:
104 body += " for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
105 " uint64_t mask = uint64_t(1) << i;\n"
106 " if (i >= gl_SubGroupInvocationARB && (value & mask) == 0)\n"
107 " temp = false;\n"
108 " if (i < gl_SubGroupInvocationARB && (value & mask) != 0)\n"
109 " temp = false;\n"
110 " };\n";
111 break;
112 case MASKTYPE_GT:
113 body += " for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
114 " uint64_t mask = uint64_t(1) << i;\n"
115 " if (i > gl_SubGroupInvocationARB && (value & mask) == 0)\n"
116 " temp = false;\n"
117 " if (i <= gl_SubGroupInvocationARB && (value & mask) != 0)\n"
118 " temp = false;\n"
119 " };\n";
120 break;
121 case MASKTYPE_LE:
122 body += " for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
123 " uint64_t mask = uint64_t(1) << i;\n"
124 " if (i <= gl_SubGroupInvocationARB && (value & mask) == 0)\n"
125 " temp = false;\n"
126 " if (i > gl_SubGroupInvocationARB && (value & mask) != 0)\n"
127 " temp = false;\n"
128 " };\n";
129 break;
130 case MASKTYPE_LT:
131 body += " for (uint i = 0; i < gl_SubGroupSizeARB; i++) {\n"
132 " uint64_t mask = uint64_t(1) << i;\n"
133 " if (i < gl_SubGroupInvocationARB && (value & mask) == 0)\n"
134 " temp = false;\n"
135 " if (i >= gl_SubGroupInvocationARB && (value & mask) != 0)\n"
136 " temp = false;\n"
137 " };\n";
138 break;
139 default:
140 TCU_THROW(InternalError, "Unknown mask type");
141 }
142
143 body += " uint tempResult = temp ? 0xf : 0x2;\n";
144 body += " tempRes = tempResult;\n";
145
146 return body;
147 }
148
getExtHeader(const CaseDefinition &)149 string getExtHeader(const CaseDefinition &)
150 {
151 return "#extension GL_ARB_shader_ballot: enable\n"
152 "#extension GL_ARB_gpu_shader_int64: enable\n";
153 }
154
getPerStageHeadDeclarations(const CaseDefinition & caseDef)155 vector<string> getPerStageHeadDeclarations(const CaseDefinition &caseDef)
156 {
157 const uint32_t stageCount = subgroups::getStagesCount(caseDef.shaderStage);
158 const bool fragment = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
159 vector<string> result(stageCount, string());
160
161 if (fragment)
162 result.reserve(result.size() + 1);
163
164 for (size_t i = 0; i < result.size(); ++i)
165 {
166 result[i] = "layout(set = 0, binding = " + de::toString(i) +
167 ", std430) buffer Buffer1\n"
168 "{\n"
169 " uint result[];\n"
170 "};\n";
171 }
172
173 if (fragment)
174 {
175 const string fragPart = "layout(location = 0) out uint result;\n";
176
177 result.push_back(fragPart);
178 }
179
180 return result;
181 }
182
getFramebufferPerStageHeadDeclarations(const CaseDefinition & caseDef)183 vector<string> getFramebufferPerStageHeadDeclarations(const CaseDefinition &caseDef)
184 {
185 vector<string> result;
186
187 DE_UNREF(caseDef);
188
189 result.push_back("layout(location = 0) out float result;\n");
190 result.push_back("layout(location = 0) out float out_color;\n");
191 result.push_back("layout(location = 0) out float out_color[];\n");
192 result.push_back("layout(location = 0) out float out_color;\n");
193
194 return result;
195 }
196
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)197 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
198 {
199 const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
200 const string extHeader = getExtHeader(caseDef);
201 const string testSrc = getBodySource(caseDef);
202 const vector<string> headDeclarations = getFramebufferPerStageHeadDeclarations(caseDef);
203 const bool pointSizeSupported = *caseDef.geometryPointSizeSupported;
204
205 subgroups::initStdFrameBufferPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
206 pointSizeSupported, extHeader, testSrc, "", headDeclarations);
207 }
208
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)209 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
210 {
211 #ifndef CTS_USES_VULKANSC
212 const bool spirv14required =
213 (isAllRayTracingStages(caseDef.shaderStage) || isAllMeshShadingStages(caseDef.shaderStage));
214 #else
215 const bool spirv14required = false;
216 #endif // CTS_USES_VULKANSC
217 const SpirvVersion spirvVersion = (spirv14required ? SPIRV_VERSION_1_4 : SPIRV_VERSION_1_3);
218 const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spirvVersion, 0u, spirv14required);
219 const string extHeader = getExtHeader(caseDef);
220 const string testSrc = getBodySource(caseDef);
221 const vector<string> headDeclarations = getPerStageHeadDeclarations(caseDef);
222 const bool pointSizeSupport = *caseDef.geometryPointSizeSupported;
223 const SpirVAsmBuildOptions buildOptionsSpr(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
224
225 if (isAllComputeStages(caseDef.shaderStage))
226 {
227 string compute = "";
228 switch (caseDef.maskType)
229 {
230 case MASKTYPE_EQ:
231 compute += "; SPIR-V\n"
232 "; Version: 1.6\n"
233 "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
234 "; Bound: 98\n"
235 "; Schema: 0\n"
236 "OpCapability Shader\n"
237 "OpCapability Int64\n"
238 "OpCapability SubgroupBallotKHR\n"
239 "OpExtension \"SPV_KHR_shader_ballot\"\n"
240 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
241 "OpMemoryModel Logical GLSL450\n"
242 "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
243 "%gl_SubGroupEqMaskARB %gl_SubGroupInvocationARB\n"
244 "OpExecutionMode %main LocalSize 1 1 1\n"
245 "OpSource GLSL 450\n"
246 "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
247 "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
248 "OpName %main \"main\"\n"
249 "OpName %globalSize \"globalSize\"\n"
250 "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
251 "OpName %offset \"offset\"\n"
252 "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
253 "OpName %bitmask \"bitmask\"\n"
254 "OpName %gl_SubGroupEqMaskARB \"gl_SubGroupEqMaskARB\"\n"
255 "OpName %temp \"temp\"\n"
256 "OpName %elementIndex \"elementIndex\"\n"
257 "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
258 "OpName %bitPosition \"bitPosition\"\n"
259 "OpName %mask \"mask\"\n"
260 "OpName %element \"element\"\n"
261 "OpName %tempResult \"tempResult\"\n"
262 "OpName %tempRes \"tempRes\"\n"
263 "OpName %Buffer1 \"Buffer1\"\n"
264 "OpMemberName %Buffer1 0 \"result\"\n"
265 "OpName %_ \"\"\n"
266 "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
267 "OpDecorate %19 SpecId 0\n"
268 "OpDecorate %20 SpecId 1\n"
269 "OpDecorate %21 SpecId 2\n"
270 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
271 "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
272 "OpDecorate %gl_SubGroupEqMaskARB BuiltIn SubgroupEqMask\n"
273 "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
274 "OpDecorate %_runtimearr_uint ArrayStride 4\n"
275 "OpMemberDecorate %Buffer1 0 Offset 0\n"
276 "OpDecorate %Buffer1 BufferBlock\n"
277 "OpDecorate %_ DescriptorSet 0\n"
278 "OpDecorate %_ Binding 0\n"
279 "%void = OpTypeVoid\n"
280 "%25 = OpTypeFunction %void\n"
281 "%uint = OpTypeInt 32 0\n"
282 "%v3uint = OpTypeVector %uint 3\n"
283 "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
284 "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
285 "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
286 "%19 = OpSpecConstant %uint 1\n"
287 "%20 = OpSpecConstant %uint 1\n"
288 "%21 = OpSpecConstant %uint 1\n"
289 "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %19 %20 %21\n"
290 "%_ptr_Function_uint = OpTypePointer Function %uint\n"
291 "%uint_0 = OpConstant %uint 0\n"
292 "%uint_1 = OpConstant %uint 1\n"
293 "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
294 "%uint_2 = OpConstant %uint 2\n"
295 "%_ptr_Input_uint = OpTypePointer Input %uint\n"
296 "%int = OpTypeInt 32 1\n"
297 "%v4uint = OpTypeVector %uint 4\n"
298 "%uint_4 = OpConstant %uint 4\n"
299 "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
300 "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
301 "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
302 "%ulong = OpTypeInt 64 0\n"
303 "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
304 "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
305 "%gl_SubGroupEqMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
306 "%bool = OpTypeBool\n"
307 "%_ptr_Function_bool = OpTypePointer Function %bool\n"
308 "%true = OpConstantTrue %bool\n"
309 "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
310 "%uint_32 = OpConstant %uint 32\n"
311 "%_ptr_Function_int = OpTypePointer Function %int\n"
312 "%int_15 = OpConstant %int 15\n"
313 "%int_2 = OpConstant %int 2\n"
314 "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
315 "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
316 "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
317 "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
318 "%int_0 = OpConstant %int 0\n"
319 "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
320 "%main = OpFunction %void None %25\n"
321 "%54 = OpLabel\n"
322 "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
323 "%offset = OpVariable %_ptr_Function_uint Function\n"
324 "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
325 "%temp = OpVariable %_ptr_Function_bool Function\n"
326 "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
327 "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
328 "%mask = OpVariable %_ptr_Function_uint Function\n"
329 "%element = OpVariable %_ptr_Function_uint Function\n"
330 "%tempResult = OpVariable %_ptr_Function_uint Function\n"
331 "%tempRes = OpVariable %_ptr_Function_uint Function\n"
332 "%55 = OpLoad %v3uint %gl_NumWorkGroups\n"
333 "%56 = OpIMul %v3uint %55 %gl_WorkGroupSize\n"
334 "OpStore %globalSize %56\n"
335 "%57 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
336 "%58 = OpLoad %uint %57\n"
337 "%59 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
338 "%60 = OpLoad %uint %59\n"
339 "%61 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
340 "%62 = OpLoad %uint %61\n"
341 "%63 = OpIMul %uint %60 %62\n"
342 "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
343 "%65 = OpLoad %uint %64\n"
344 "%66 = OpIAdd %uint %63 %65\n"
345 "%67 = OpIMul %uint %58 %66\n"
346 "%68 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
347 "%69 = OpLoad %uint %68\n"
348 "%70 = OpIAdd %uint %67 %69\n"
349 "OpStore %offset %70\n"
350 "%71 = OpLoad %v4uint %gl_SubGroupEqMaskARB\n"
351 "%72 = OpCompositeExtract %uint %71 0\n"
352 "%73 = OpCompositeExtract %uint %71 1\n"
353 "%74 = OpCompositeExtract %uint %71 2\n"
354 "%75 = OpCompositeExtract %uint %71 3\n"
355 "%76 = OpCompositeConstruct %_arr_uint_uint_4 %72 %73 %74 %75\n"
356 "OpStore %bitmask %76\n"
357 "OpStore %temp %true\n"
358 "%77 = OpLoad %uint %gl_SubGroupInvocationARB\n"
359 "%78 = OpUDiv %uint %77 %uint_32\n"
360 "OpStore %elementIndex %78\n"
361 "%79 = OpLoad %uint %gl_SubGroupInvocationARB\n"
362 "%80 = OpUMod %uint %79 %uint_32\n"
363 "OpStore %bitPosition %80\n"
364 "%81 = OpLoad %uint %bitPosition\n"
365 "%82 = OpShiftLeftLogical %uint %uint_1 %81\n"
366 "OpStore %mask %82\n"
367 "%83 = OpLoad %uint %elementIndex\n"
368 "%84 = OpAccessChain %_ptr_Function_uint %bitmask %83\n"
369 "%85 = OpLoad %uint %84\n"
370 "OpStore %element %85\n"
371 "%87 = OpLoad %uint %element\n"
372 "%88 = OpLoad %uint %mask\n"
373 "%89 = OpBitwiseAnd %uint %87 %88\n"
374 "%90 = OpINotEqual %bool %89 %uint_0\n"
375 "OpStore %temp %90\n"
376 "%91 = OpLoad %bool %temp\n"
377 "%92 = OpSelect %int %91 %int_15 %int_2\n"
378 "%93 = OpBitcast %uint %92\n"
379 "OpStore %tempResult %93\n"
380 "%94 = OpLoad %uint %tempResult\n"
381 "OpStore %tempRes %94\n"
382 "%95 = OpLoad %uint %offset\n"
383 "%96 = OpLoad %uint %tempRes\n"
384 "%97 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %95\n"
385 "OpStore %97 %96\n"
386 "OpReturn\n"
387 "OpFunctionEnd\n";
388 break;
389 case MASKTYPE_GE:
390 compute += "; SPIR-V\n"
391 "; Version: 1.6\n"
392 "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
393 "; Bound: 128\n"
394 "; Schema: 0\n"
395 "OpCapability Shader\n"
396 "OpCapability Int64\n"
397 "OpCapability SubgroupBallotKHR\n"
398 "OpExtension \"SPV_KHR_shader_ballot\"\n"
399 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
400 "OpMemoryModel Logical GLSL450\n"
401 "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
402 "%gl_SubGroupGeMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
403 "OpExecutionMode %main LocalSize 1 1 1\n"
404 "OpSource GLSL 450\n"
405 "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
406 "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
407 "OpName %main \"main\"\n"
408 "OpName %globalSize \"globalSize\"\n"
409 "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
410 "OpName %offset \"offset\"\n"
411 "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
412 "OpName %bitmask \"bitmask\"\n"
413 "OpName %gl_SubGroupGeMaskARB \"gl_SubGroupGeMaskARB\"\n"
414 "OpName %temp \"temp\"\n"
415 "OpName %i \"i\"\n"
416 "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
417 "OpName %elementIndex \"elementIndex\"\n"
418 "OpName %bitPosition \"bitPosition\"\n"
419 "OpName %mask \"mask\"\n"
420 "OpName %element \"element\"\n"
421 "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
422 "OpName %tempResult \"tempResult\"\n"
423 "OpName %tempRes \"tempRes\"\n"
424 "OpName %Buffer1 \"Buffer1\"\n"
425 "OpMemberName %Buffer1 0 \"result\"\n"
426 "OpName %_ \"\"\n"
427 "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
428 "OpDecorate %21 SpecId 0\n"
429 "OpDecorate %22 SpecId 1\n"
430 "OpDecorate %23 SpecId 2\n"
431 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
432 "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
433 "OpDecorate %gl_SubGroupGeMaskARB BuiltIn SubgroupGeMask\n"
434 "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
435 "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
436 "OpDecorate %_runtimearr_uint ArrayStride 4\n"
437 "OpMemberDecorate %Buffer1 0 Offset 0\n"
438 "OpDecorate %Buffer1 BufferBlock\n"
439 "OpDecorate %_ DescriptorSet 0\n"
440 "OpDecorate %_ Binding 0\n"
441 "%void = OpTypeVoid\n"
442 "%27 = OpTypeFunction %void\n"
443 "%uint = OpTypeInt 32 0\n"
444 "%v3uint = OpTypeVector %uint 3\n"
445 "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
446 "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
447 "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
448 "%21 = OpSpecConstant %uint 1\n"
449 "%22 = OpSpecConstant %uint 1\n"
450 "%23 = OpSpecConstant %uint 1\n"
451 "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
452 "%_ptr_Function_uint = OpTypePointer Function %uint\n"
453 "%uint_0 = OpConstant %uint 0\n"
454 "%uint_1 = OpConstant %uint 1\n"
455 "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
456 "%uint_2 = OpConstant %uint 2\n"
457 "%_ptr_Input_uint = OpTypePointer Input %uint\n"
458 "%v4uint = OpTypeVector %uint 4\n"
459 "%uint_4 = OpConstant %uint 4\n"
460 "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
461 "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
462 "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
463 "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
464 "%gl_SubGroupGeMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
465 "%bool = OpTypeBool\n"
466 "%_ptr_Function_bool = OpTypePointer Function %bool\n"
467 "%true = OpConstantTrue %bool\n"
468 "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
469 "%uint_32 = OpConstant %uint 32\n"
470 "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
471 "%false = OpConstantFalse %bool\n"
472 "%int = OpTypeInt 32 1\n"
473 "%int_1 = OpConstant %int 1\n"
474 "%int_15 = OpConstant %int 15\n"
475 "%int_2 = OpConstant %int 2\n"
476 "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
477 "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
478 "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
479 "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
480 "%int_0 = OpConstant %int 0\n"
481 "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
482 "%main = OpFunction %void None %27\n"
483 "%55 = OpLabel\n"
484 "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
485 "%offset = OpVariable %_ptr_Function_uint Function\n"
486 "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
487 "%temp = OpVariable %_ptr_Function_bool Function\n"
488 "%i = OpVariable %_ptr_Function_uint Function\n"
489 "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
490 "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
491 "%mask = OpVariable %_ptr_Function_uint Function\n"
492 "%element = OpVariable %_ptr_Function_uint Function\n"
493 "%tempResult = OpVariable %_ptr_Function_uint Function\n"
494 "%tempRes = OpVariable %_ptr_Function_uint Function\n"
495 "%56 = OpLoad %v3uint %gl_NumWorkGroups\n"
496 "%57 = OpIMul %v3uint %56 %gl_WorkGroupSize\n"
497 "OpStore %globalSize %57\n"
498 "%58 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
499 "%59 = OpLoad %uint %58\n"
500 "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
501 "%61 = OpLoad %uint %60\n"
502 "%62 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
503 "%63 = OpLoad %uint %62\n"
504 "%64 = OpIMul %uint %61 %63\n"
505 "%65 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
506 "%66 = OpLoad %uint %65\n"
507 "%67 = OpIAdd %uint %64 %66\n"
508 "%68 = OpIMul %uint %59 %67\n"
509 "%69 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
510 "%70 = OpLoad %uint %69\n"
511 "%71 = OpIAdd %uint %68 %70\n"
512 "OpStore %offset %71\n"
513 "%72 = OpLoad %v4uint %gl_SubGroupGeMaskARB\n"
514 "%73 = OpCompositeExtract %uint %72 0\n"
515 "%74 = OpCompositeExtract %uint %72 1\n"
516 "%75 = OpCompositeExtract %uint %72 2\n"
517 "%76 = OpCompositeExtract %uint %72 3\n"
518 "%77 = OpCompositeConstruct %_arr_uint_uint_4 %73 %74 %75 %76\n"
519 "OpStore %bitmask %77\n"
520 "OpStore %temp %true\n"
521 "OpStore %i %uint_0\n"
522 "OpBranch %78\n"
523 "%78 = OpLabel\n"
524 "OpLoopMerge %79 %80 None\n"
525 "OpBranch %81\n"
526 "%81 = OpLabel\n"
527 "%82 = OpLoad %uint %i\n"
528 "%83 = OpLoad %uint %gl_SubGroupSizeARB\n"
529 "%84 = OpULessThan %bool %82 %83\n"
530 "OpBranchConditional %84 %85 %79\n"
531 "%85 = OpLabel\n"
532 "%86 = OpLoad %uint %i\n"
533 "%87 = OpUDiv %uint %86 %uint_32\n"
534 "OpStore %elementIndex %87\n"
535 "%88 = OpLoad %uint %i\n"
536 "%89 = OpUMod %uint %88 %uint_32\n"
537 "OpStore %bitPosition %89\n"
538 "%90 = OpLoad %uint %bitPosition\n"
539 "%91 = OpShiftLeftLogical %uint %uint_1 %90\n"
540 "OpStore %mask %91\n"
541 "%92 = OpLoad %uint %elementIndex\n"
542 "%93 = OpAccessChain %_ptr_Function_uint %bitmask %92\n"
543 "%94 = OpLoad %uint %93\n"
544 "OpStore %element %94\n"
545 "%95 = OpLoad %uint %i\n"
546 "%96 = OpLoad %uint %gl_SubGroupInvocationARB\n"
547 "%97 = OpUGreaterThanEqual %bool %95 %96\n"
548 "OpSelectionMerge %98 None\n"
549 "OpBranchConditional %97 %99 %98\n"
550 "%99 = OpLabel\n"
551 "%100 = OpLoad %uint %element\n"
552 "%101 = OpLoad %uint %mask\n"
553 "%102 = OpBitwiseAnd %uint %100 %101\n"
554 "%103 = OpIEqual %bool %102 %uint_0\n"
555 "OpBranch %98\n"
556 "%98 = OpLabel\n"
557 "%104 = OpPhi %bool %97 %85 %103 %99\n"
558 "OpSelectionMerge %105 None\n"
559 "OpBranchConditional %104 %106 %105\n"
560 "%106 = OpLabel\n"
561 "OpStore %temp %false\n"
562 "OpBranch %105\n"
563 "%105 = OpLabel\n"
564 "%107 = OpLoad %uint %i\n"
565 "%108 = OpLoad %uint %gl_SubGroupInvocationARB\n"
566 "%109 = OpULessThan %bool %107 %108\n"
567 "OpSelectionMerge %110 None\n"
568 "OpBranchConditional %109 %111 %110\n"
569 "%111 = OpLabel\n"
570 "%112 = OpLoad %uint %element\n"
571 "%113 = OpLoad %uint %mask\n"
572 "%114 = OpBitwiseAnd %uint %112 %113\n"
573 "%115 = OpINotEqual %bool %114 %uint_0\n"
574 "OpBranch %110\n"
575 "%110 = OpLabel\n"
576 "%116 = OpPhi %bool %109 %105 %115 %111\n"
577 "OpSelectionMerge %117 None\n"
578 "OpBranchConditional %116 %118 %117\n"
579 "%118 = OpLabel\n"
580 "OpStore %temp %false\n"
581 "OpBranch %117\n"
582 "%117 = OpLabel\n"
583 "OpBranch %80\n"
584 "%80 = OpLabel\n"
585 "%119 = OpLoad %uint %i\n"
586 "%120 = OpIAdd %uint %119 %int_1\n"
587 "OpStore %i %120\n"
588 "OpBranch %78\n"
589 "%79 = OpLabel\n"
590 "%121 = OpLoad %bool %temp\n"
591 "%122 = OpSelect %int %121 %int_15 %int_2\n"
592 "%123 = OpBitcast %uint %122\n"
593 "OpStore %tempResult %123\n"
594 "%124 = OpLoad %uint %tempResult\n"
595 "OpStore %tempRes %124\n"
596 "%125 = OpLoad %uint %offset\n"
597 "%126 = OpLoad %uint %tempRes\n"
598 "%127 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %125\n"
599 "OpStore %127 %126\n"
600 "OpReturn\n"
601 "OpFunctionEnd\n";
602 break;
603 case MASKTYPE_GT:
604 compute += "; SPIR-V\n"
605 "; Version: 1.6\n"
606 "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
607 "; Bound: 130\n"
608 "; Schema: 0\n"
609 "OpCapability Shader\n"
610 "OpCapability Int64\n"
611 "OpCapability SubgroupBallotKHR\n"
612 "OpExtension \"SPV_KHR_shader_ballot\"\n"
613 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
614 "OpMemoryModel Logical GLSL450\n"
615 "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
616 "%gl_SubGroupGtMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
617 "OpExecutionMode %main LocalSize 1 1 1\n"
618 "OpSource GLSL 450\n"
619 "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
620 "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
621 "OpName %main \"main\"\n"
622 "OpName %globalSize \"globalSize\"\n"
623 "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
624 "OpName %offset \"offset\"\n"
625 "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
626 "OpName %bitmask \"bitmask\"\n"
627 "OpName %gl_SubGroupGtMaskARB \"gl_SubGroupGtMaskARB\"\n"
628 "OpName %temp \"temp\"\n"
629 "OpName %i \"i\"\n"
630 "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
631 "OpName %elementIndex \"elementIndex\"\n"
632 "OpName %bitPosition \"bitPosition\"\n"
633 "OpName %mask \"mask\"\n"
634 "OpName %element \"element\"\n"
635 "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
636 "OpName %tempResult \"tempResult\"\n"
637 "OpName %tempRes \"tempRes\"\n"
638 "OpName %Buffer1 \"Buffer1\"\n"
639 "OpMemberName %Buffer1 0 \"result\"\n"
640 "OpName %_ \"\"\n"
641 "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
642 "OpDecorate %21 SpecId 0\n"
643 "OpDecorate %22 SpecId 1\n"
644 "OpDecorate %23 SpecId 2\n"
645 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
646 "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
647 "OpDecorate %gl_SubGroupGtMaskARB BuiltIn SubgroupGtMask\n"
648 "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
649 "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
650 "OpDecorate %_runtimearr_uint ArrayStride 4\n"
651 "OpMemberDecorate %Buffer1 0 Offset 0\n"
652 "OpDecorate %Buffer1 BufferBlock\n"
653 "OpDecorate %_ DescriptorSet 0\n"
654 "OpDecorate %_ Binding 0\n"
655 "%void = OpTypeVoid\n"
656 "%27 = OpTypeFunction %void\n"
657 "%uint = OpTypeInt 32 0\n"
658 "%v3uint = OpTypeVector %uint 3\n"
659 "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
660 "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
661 "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
662 "%21 = OpSpecConstant %uint 1\n"
663 "%22 = OpSpecConstant %uint 1\n"
664 "%23 = OpSpecConstant %uint 1\n"
665 "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
666 "%_ptr_Function_uint = OpTypePointer Function %uint\n"
667 "%uint_0 = OpConstant %uint 0\n"
668 "%uint_1 = OpConstant %uint 1\n"
669 "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
670 "%uint_2 = OpConstant %uint 2\n"
671 "%_ptr_Input_uint = OpTypePointer Input %uint\n"
672 "%v4uint = OpTypeVector %uint 4\n"
673 "%uint_4 = OpConstant %uint 4\n"
674 "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
675 "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
676 "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
677 "%ulong = OpTypeInt 64 0\n"
678 "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
679 "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
680 "%gl_SubGroupGtMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
681 "%bool = OpTypeBool\n"
682 "%_ptr_Function_bool = OpTypePointer Function %bool\n"
683 "%true = OpConstantTrue %bool\n"
684 "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
685 "%uint_32 = OpConstant %uint 32\n"
686 "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
687 "%false = OpConstantFalse %bool\n"
688 "%int = OpTypeInt 32 1\n"
689 "%int_1 = OpConstant %int 1\n"
690 "%int_15 = OpConstant %int 15\n"
691 "%int_2 = OpConstant %int 2\n"
692 "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
693 "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
694 "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
695 "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
696 "%int_0 = OpConstant %int 0\n"
697 "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
698 "%main = OpFunction %void None %27\n"
699 "%57 = OpLabel\n"
700 "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
701 "%offset = OpVariable %_ptr_Function_uint Function\n"
702 "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
703 "%temp = OpVariable %_ptr_Function_bool Function\n"
704 "%i = OpVariable %_ptr_Function_uint Function\n"
705 "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
706 "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
707 "%mask = OpVariable %_ptr_Function_uint Function\n"
708 "%element = OpVariable %_ptr_Function_uint Function\n"
709 "%tempResult = OpVariable %_ptr_Function_uint Function\n"
710 "%tempRes = OpVariable %_ptr_Function_uint Function\n"
711 "%58 = OpLoad %v3uint %gl_NumWorkGroups\n"
712 "%59 = OpIMul %v3uint %58 %gl_WorkGroupSize\n"
713 "OpStore %globalSize %59\n"
714 "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
715 "%61 = OpLoad %uint %60\n"
716 "%62 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
717 "%63 = OpLoad %uint %62\n"
718 "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
719 "%65 = OpLoad %uint %64\n"
720 "%66 = OpIMul %uint %63 %65\n"
721 "%67 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
722 "%68 = OpLoad %uint %67\n"
723 "%69 = OpIAdd %uint %66 %68\n"
724 "%70 = OpIMul %uint %61 %69\n"
725 "%71 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
726 "%72 = OpLoad %uint %71\n"
727 "%73 = OpIAdd %uint %70 %72\n"
728 "OpStore %offset %73\n"
729 "%74 = OpLoad %v4uint %gl_SubGroupGtMaskARB\n"
730 "%75 = OpCompositeExtract %uint %74 0\n"
731 "%76 = OpCompositeExtract %uint %74 1\n"
732 "%77 = OpCompositeExtract %uint %74 2\n"
733 "%78 = OpCompositeExtract %uint %74 3\n"
734 "%79 = OpCompositeConstruct %_arr_uint_uint_4 %75 %76 %77 %78\n"
735 "OpStore %bitmask %79\n"
736 "OpStore %temp %true\n"
737 "OpStore %i %uint_0\n"
738 "OpBranch %80\n"
739 "%80 = OpLabel\n"
740 "OpLoopMerge %81 %82 None\n"
741 "OpBranch %83\n"
742 "%83 = OpLabel\n"
743 "%84 = OpLoad %uint %i\n"
744 "%85 = OpLoad %uint %gl_SubGroupSizeARB\n"
745 "%86 = OpULessThan %bool %84 %85\n"
746 "OpBranchConditional %86 %87 %81\n"
747 "%87 = OpLabel\n"
748 "%88 = OpLoad %uint %i\n"
749 "%89 = OpUDiv %uint %88 %uint_32\n"
750 "OpStore %elementIndex %89\n"
751 "%90 = OpLoad %uint %i\n"
752 "%91 = OpUMod %uint %90 %uint_32\n"
753 "OpStore %bitPosition %91\n"
754 "%92 = OpLoad %uint %bitPosition\n"
755 "%93 = OpShiftLeftLogical %uint %uint_1 %92\n"
756 "OpStore %mask %93\n"
757 "%94 = OpLoad %uint %elementIndex\n"
758 "%95 = OpAccessChain %_ptr_Function_uint %bitmask %94\n"
759 "%96 = OpLoad %uint %95\n"
760 "OpStore %element %96\n"
761 "%97 = OpLoad %uint %i\n"
762 "%98 = OpLoad %uint %gl_SubGroupInvocationARB\n"
763 "%99 = OpUGreaterThan %bool %97 %98\n"
764 "OpSelectionMerge %100 None\n"
765 "OpBranchConditional %99 %101 %100\n"
766 "%101 = OpLabel\n"
767 "%102 = OpLoad %uint %element\n"
768 "%103 = OpLoad %uint %mask\n"
769 "%104 = OpBitwiseAnd %uint %102 %103\n"
770 "%105 = OpIEqual %bool %104 %uint_0\n"
771 "OpBranch %100\n"
772 "%100 = OpLabel\n"
773 "%106 = OpPhi %bool %99 %87 %105 %101\n"
774 "OpSelectionMerge %107 None\n"
775 "OpBranchConditional %106 %108 %107\n"
776 "%108 = OpLabel\n"
777 "OpStore %temp %false\n"
778 "OpBranch %107\n"
779 "%107 = OpLabel\n"
780 "%109 = OpLoad %uint %i\n"
781 "%110 = OpLoad %uint %gl_SubGroupInvocationARB\n"
782 "%111 = OpULessThanEqual %bool %109 %110\n"
783 "OpSelectionMerge %112 None\n"
784 "OpBranchConditional %111 %113 %112\n"
785 "%113 = OpLabel\n"
786 "%114 = OpLoad %uint %element\n"
787 "%115 = OpLoad %uint %mask\n"
788 "%116 = OpBitwiseAnd %uint %114 %115\n"
789 "%117 = OpINotEqual %bool %116 %uint_0\n"
790 "OpBranch %112\n"
791 "%112 = OpLabel\n"
792 "%118 = OpPhi %bool %111 %107 %117 %113\n"
793 "OpSelectionMerge %119 None\n"
794 "OpBranchConditional %118 %120 %119\n"
795 "%120 = OpLabel\n"
796 "OpStore %temp %false\n"
797 "OpBranch %119\n"
798 "%119 = OpLabel\n"
799 "OpBranch %82\n"
800 "%82 = OpLabel\n"
801 "%121 = OpLoad %uint %i\n"
802 "%122 = OpIAdd %uint %121 %int_1\n"
803 "OpStore %i %122\n"
804 "OpBranch %80\n"
805 "%81 = OpLabel\n"
806 "%123 = OpLoad %bool %temp\n"
807 "%124 = OpSelect %int %123 %int_15 %int_2\n"
808 "%125 = OpBitcast %uint %124\n"
809 "OpStore %tempResult %125\n"
810 "%126 = OpLoad %uint %tempResult\n"
811 "OpStore %tempRes %126\n"
812 "%127 = OpLoad %uint %offset\n"
813 "%128 = OpLoad %uint %tempRes\n"
814 "%129 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %127\n"
815 "OpStore %129 %128\n"
816 "OpReturn\n"
817 "OpFunctionEnd\n";
818 break;
819 case MASKTYPE_LE:
820 compute += "; SPIR-V\n"
821 "; Version: 1.6\n"
822 "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
823 "; Bound: 130\n"
824 "; Schema: 0\n"
825 "OpCapability Shader\n"
826 "OpCapability Int64\n"
827 "OpCapability SubgroupBallotKHR\n"
828 "OpExtension \"SPV_KHR_shader_ballot\"\n"
829 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
830 "OpMemoryModel Logical GLSL450\n"
831 "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
832 "%gl_SubGroupLeMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
833 "OpExecutionMode %main LocalSize 1 1 1\n"
834 "OpSource GLSL 450\n"
835 "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
836 "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
837 "OpName %main \"main\"\n"
838 "OpName %globalSize \"globalSize\"\n"
839 "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
840 "OpName %offset \"offset\"\n"
841 "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
842 "OpName %bitmask \"bitmask\"\n"
843 "OpName %gl_SubGroupLeMaskARB \"gl_SubGroupLeMaskARB\"\n"
844 "OpName %temp \"temp\"\n"
845 "OpName %i \"i\"\n"
846 "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
847 "OpName %elementIndex \"elementIndex\"\n"
848 "OpName %bitPosition \"bitPosition\"\n"
849 "OpName %mask \"mask\"\n"
850 "OpName %element \"element\"\n"
851 "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
852 "OpName %tempResult \"tempResult\"\n"
853 "OpName %tempRes \"tempRes\"\n"
854 "OpName %Buffer1 \"Buffer1\"\n"
855 "OpMemberName %Buffer1 0 \"result\"\n"
856 "OpName %_ \"\"\n"
857 "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
858 "OpDecorate %21 SpecId 0\n"
859 "OpDecorate %22 SpecId 1\n"
860 "OpDecorate %23 SpecId 2\n"
861 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
862 "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
863 "OpDecorate %gl_SubGroupLeMaskARB BuiltIn SubgroupLeMask\n"
864 "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
865 "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
866 "OpDecorate %_runtimearr_uint ArrayStride 4\n"
867 "OpMemberDecorate %Buffer1 0 Offset 0\n"
868 "OpDecorate %Buffer1 BufferBlock\n"
869 "OpDecorate %_ DescriptorSet 0\n"
870 "OpDecorate %_ Binding 0\n"
871 "%void = OpTypeVoid\n"
872 "%27 = OpTypeFunction %void\n"
873 "%uint = OpTypeInt 32 0\n"
874 "%v3uint = OpTypeVector %uint 3\n"
875 "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
876 "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
877 "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
878 "%21 = OpSpecConstant %uint 1\n"
879 "%22 = OpSpecConstant %uint 1\n"
880 "%23 = OpSpecConstant %uint 1\n"
881 "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
882 "%_ptr_Function_uint = OpTypePointer Function %uint\n"
883 "%uint_0 = OpConstant %uint 0\n"
884 "%uint_1 = OpConstant %uint 1\n"
885 "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
886 "%uint_2 = OpConstant %uint 2\n"
887 "%_ptr_Input_uint = OpTypePointer Input %uint\n"
888 "%v4uint = OpTypeVector %uint 4\n"
889 "%uint_4 = OpConstant %uint 4\n"
890 "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
891 "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
892 "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
893 "%ulong = OpTypeInt 64 0\n"
894 "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
895 "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
896 "%gl_SubGroupLeMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
897 "%bool = OpTypeBool\n"
898 "%_ptr_Function_bool = OpTypePointer Function %bool\n"
899 "%true = OpConstantTrue %bool\n"
900 "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
901 "%uint_32 = OpConstant %uint 32\n"
902 "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
903 "%false = OpConstantFalse %bool\n"
904 "%int = OpTypeInt 32 1\n"
905 "%int_1 = OpConstant %int 1\n"
906 "%int_15 = OpConstant %int 15\n"
907 "%int_2 = OpConstant %int 2\n"
908 "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
909 "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
910 "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
911 "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
912 "%int_0 = OpConstant %int 0\n"
913 "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
914 "%main = OpFunction %void None %27\n"
915 "%57 = OpLabel\n"
916 "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
917 "%offset = OpVariable %_ptr_Function_uint Function\n"
918 "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
919 "%temp = OpVariable %_ptr_Function_bool Function\n"
920 "%i = OpVariable %_ptr_Function_uint Function\n"
921 "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
922 "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
923 "%mask = OpVariable %_ptr_Function_uint Function\n"
924 "%element = OpVariable %_ptr_Function_uint Function\n"
925 "%tempResult = OpVariable %_ptr_Function_uint Function\n"
926 "%tempRes = OpVariable %_ptr_Function_uint Function\n"
927 "%58 = OpLoad %v3uint %gl_NumWorkGroups\n"
928 "%59 = OpIMul %v3uint %58 %gl_WorkGroupSize\n"
929 "OpStore %globalSize %59\n"
930 "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
931 "%61 = OpLoad %uint %60\n"
932 "%62 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
933 "%63 = OpLoad %uint %62\n"
934 "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
935 "%65 = OpLoad %uint %64\n"
936 "%66 = OpIMul %uint %63 %65\n"
937 "%67 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
938 "%68 = OpLoad %uint %67\n"
939 "%69 = OpIAdd %uint %66 %68\n"
940 "%70 = OpIMul %uint %61 %69\n"
941 "%71 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
942 "%72 = OpLoad %uint %71\n"
943 "%73 = OpIAdd %uint %70 %72\n"
944 "OpStore %offset %73\n"
945 "%74 = OpLoad %v4uint %gl_SubGroupLeMaskARB\n"
946 "%75 = OpCompositeExtract %uint %74 0\n"
947 "%76 = OpCompositeExtract %uint %74 1\n"
948 "%77 = OpCompositeExtract %uint %74 2\n"
949 "%78 = OpCompositeExtract %uint %74 3\n"
950 "%79 = OpCompositeConstruct %_arr_uint_uint_4 %75 %76 %77 %78\n"
951 "OpStore %bitmask %79\n"
952 "OpStore %temp %true\n"
953 "OpStore %i %uint_0\n"
954 "OpBranch %80\n"
955 "%80 = OpLabel\n"
956 "OpLoopMerge %81 %82 None\n"
957 "OpBranch %83\n"
958 "%83 = OpLabel\n"
959 "%84 = OpLoad %uint %i\n"
960 "%85 = OpLoad %uint %gl_SubGroupSizeARB\n"
961 "%86 = OpULessThan %bool %84 %85\n"
962 "OpBranchConditional %86 %87 %81\n"
963 "%87 = OpLabel\n"
964 "%88 = OpLoad %uint %i\n"
965 "%89 = OpUDiv %uint %88 %uint_32\n"
966 "OpStore %elementIndex %89\n"
967 "%90 = OpLoad %uint %i\n"
968 "%91 = OpUMod %uint %90 %uint_32\n"
969 "OpStore %bitPosition %91\n"
970 "%92 = OpLoad %uint %bitPosition\n"
971 "%93 = OpShiftLeftLogical %uint %uint_1 %92\n"
972 "OpStore %mask %93\n"
973 "%94 = OpLoad %uint %elementIndex\n"
974 "%95 = OpAccessChain %_ptr_Function_uint %bitmask %94\n"
975 "%96 = OpLoad %uint %95\n"
976 "OpStore %element %96\n"
977 "%97 = OpLoad %uint %i\n"
978 "%98 = OpLoad %uint %gl_SubGroupInvocationARB\n"
979 "%99 = OpULessThanEqual %bool %97 %98\n"
980 "OpSelectionMerge %100 None\n"
981 "OpBranchConditional %99 %101 %100\n"
982 "%101 = OpLabel\n"
983 "%102 = OpLoad %uint %element\n"
984 "%103 = OpLoad %uint %mask\n"
985 "%104 = OpBitwiseAnd %uint %102 %103\n"
986 "%105 = OpIEqual %bool %104 %uint_0\n"
987 "OpBranch %100\n"
988 "%100 = OpLabel\n"
989 "%106 = OpPhi %bool %99 %87 %105 %101\n"
990 "OpSelectionMerge %107 None\n"
991 "OpBranchConditional %106 %108 %107\n"
992 "%108 = OpLabel\n"
993 "OpStore %temp %false\n"
994 "OpBranch %107\n"
995 "%107 = OpLabel\n"
996 "%109 = OpLoad %uint %i\n"
997 "%110 = OpLoad %uint %gl_SubGroupInvocationARB\n"
998 "%111 = OpUGreaterThan %bool %109 %110\n"
999 "OpSelectionMerge %112 None\n"
1000 "OpBranchConditional %111 %113 %112\n"
1001 "%113 = OpLabel\n"
1002 "%114 = OpLoad %uint %element\n"
1003 "%115 = OpLoad %uint %mask\n"
1004 "%116 = OpBitwiseAnd %uint %114 %115\n"
1005 "%117 = OpINotEqual %bool %116 %uint_0\n"
1006 "OpBranch %112\n"
1007 "%112 = OpLabel\n"
1008 "%118 = OpPhi %bool %111 %107 %117 %113\n"
1009 "OpSelectionMerge %119 None\n"
1010 "OpBranchConditional %118 %120 %119\n"
1011 "%120 = OpLabel\n"
1012 "OpStore %temp %false\n"
1013 "OpBranch %119\n"
1014 "%119 = OpLabel\n"
1015 "OpBranch %82\n"
1016 "%82 = OpLabel\n"
1017 "%121 = OpLoad %uint %i\n"
1018 "%122 = OpIAdd %uint %121 %int_1\n"
1019 "OpStore %i %122\n"
1020 "OpBranch %80\n"
1021 "%81 = OpLabel\n"
1022 "%123 = OpLoad %bool %temp\n"
1023 "%124 = OpSelect %int %123 %int_15 %int_2\n"
1024 "%125 = OpBitcast %uint %124\n"
1025 "OpStore %tempResult %125\n"
1026 "%126 = OpLoad %uint %tempResult\n"
1027 "OpStore %tempRes %126\n"
1028 "%127 = OpLoad %uint %offset\n"
1029 "%128 = OpLoad %uint %tempRes\n"
1030 "%129 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %127\n"
1031 "OpStore %129 %128\n"
1032 "OpReturn\n"
1033 "OpFunctionEnd\n";
1034 break;
1035 case MASKTYPE_LT:
1036 compute += "; SPIR-V\n"
1037 "; Version: 1.6\n"
1038 "; Generator: Khronos SPIR-V Tools Assembler; 0\n"
1039 "; Bound: 130\n"
1040 "; Schema: 0\n"
1041 "OpCapability Shader\n"
1042 "OpCapability Int64\n"
1043 "OpCapability SubgroupBallotKHR\n"
1044 "OpExtension \"SPV_KHR_shader_ballot\"\n"
1045 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1046 "OpMemoryModel Logical GLSL450\n"
1047 "OpEntryPoint GLCompute %main \"main\" %gl_NumWorkGroups %gl_GlobalInvocationID "
1048 "%gl_SubGroupLtMaskARB %gl_SubGroupSizeARB %gl_SubGroupInvocationARB\n"
1049 "OpExecutionMode %main LocalSize 1 1 1\n"
1050 "OpSource GLSL 450\n"
1051 "OpSourceExtension \"GL_ARB_gpu_shader_int64\"\n"
1052 "OpSourceExtension \"GL_ARB_shader_ballot\"\n"
1053 "OpName %main \"main\"\n"
1054 "OpName %globalSize \"globalSize\"\n"
1055 "OpName %gl_NumWorkGroups \"gl_NumWorkGroups\"\n"
1056 "OpName %offset \"offset\"\n"
1057 "OpName %gl_GlobalInvocationID \"gl_GlobalInvocationID\"\n"
1058 "OpName %bitmask \"bitmask\"\n"
1059 "OpName %gl_SubGroupLtMaskARB \"gl_SubGroupLtMaskARB\"\n"
1060 "OpName %temp \"temp\"\n"
1061 "OpName %i \"i\"\n"
1062 "OpName %gl_SubGroupSizeARB \"gl_SubGroupSizeARB\"\n"
1063 "OpName %elementIndex \"elementIndex\"\n"
1064 "OpName %bitPosition \"bitPosition\"\n"
1065 "OpName %mask \"mask\"\n"
1066 "OpName %element \"element\"\n"
1067 "OpName %gl_SubGroupInvocationARB \"gl_SubGroupInvocationARB\"\n"
1068 "OpName %tempResult \"tempResult\"\n"
1069 "OpName %tempRes \"tempRes\"\n"
1070 "OpName %Buffer1 \"Buffer1\"\n"
1071 "OpMemberName %Buffer1 0 \"result\"\n"
1072 "OpName %_ \"\"\n"
1073 "OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups\n"
1074 "OpDecorate %21 SpecId 0\n"
1075 "OpDecorate %22 SpecId 1\n"
1076 "OpDecorate %23 SpecId 2\n"
1077 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
1078 "OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId\n"
1079 "OpDecorate %gl_SubGroupLtMaskARB BuiltIn SubgroupLtMask\n"
1080 "OpDecorate %gl_SubGroupSizeARB BuiltIn SubgroupSize\n"
1081 "OpDecorate %gl_SubGroupInvocationARB BuiltIn SubgroupLocalInvocationId\n"
1082 "OpDecorate %_runtimearr_uint ArrayStride 4\n"
1083 "OpMemberDecorate %Buffer1 0 Offset 0\n"
1084 "OpDecorate %Buffer1 BufferBlock\n"
1085 "OpDecorate %_ DescriptorSet 0\n"
1086 "OpDecorate %_ Binding 0\n"
1087 "%void = OpTypeVoid\n"
1088 "%27 = OpTypeFunction %void\n"
1089 "%uint = OpTypeInt 32 0\n"
1090 "%v3uint = OpTypeVector %uint 3\n"
1091 "%_ptr_Function_v3uint = OpTypePointer Function %v3uint\n"
1092 "%_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
1093 "%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input\n"
1094 "%21 = OpSpecConstant %uint 1\n"
1095 "%22 = OpSpecConstant %uint 1\n"
1096 "%23 = OpSpecConstant %uint 1\n"
1097 "%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23\n"
1098 "%_ptr_Function_uint = OpTypePointer Function %uint\n"
1099 "%uint_0 = OpConstant %uint 0\n"
1100 "%uint_1 = OpConstant %uint 1\n"
1101 "%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
1102 "%uint_2 = OpConstant %uint 2\n"
1103 "%_ptr_Input_uint = OpTypePointer Input %uint\n"
1104 "%v4uint = OpTypeVector %uint 4\n"
1105 "%uint_4 = OpConstant %uint 4\n"
1106 "%_arr_uint_uint_4 = OpTypeArray %uint %uint_4\n"
1107 "%_ptr_Function_v4uint = OpTypePointer Function %v4uint\n"
1108 "%_ptr_Function__arr_uint_uint_4 = OpTypePointer Function %_arr_uint_uint_4\n"
1109 "%ulong = OpTypeInt 64 0\n"
1110 "%_ptr_Input_ulong = OpTypePointer Input %ulong\n"
1111 "%_ptr_Input_v4uint = OpTypePointer Input %v4uint\n"
1112 "%gl_SubGroupLtMaskARB = OpVariable %_ptr_Input_v4uint Input\n"
1113 "%bool = OpTypeBool\n"
1114 "%_ptr_Function_bool = OpTypePointer Function %bool\n"
1115 "%true = OpConstantTrue %bool\n"
1116 "%gl_SubGroupSizeARB = OpVariable %_ptr_Input_uint Input\n"
1117 "%uint_32 = OpConstant %uint 32\n"
1118 "%gl_SubGroupInvocationARB = OpVariable %_ptr_Input_uint Input\n"
1119 "%false = OpConstantFalse %bool\n"
1120 "%int = OpTypeInt 32 1\n"
1121 "%int_1 = OpConstant %int 1\n"
1122 "%int_15 = OpConstant %int 15\n"
1123 "%int_2 = OpConstant %int 2\n"
1124 "%_runtimearr_uint = OpTypeRuntimeArray %uint\n"
1125 "%Buffer1 = OpTypeStruct %_runtimearr_uint\n"
1126 "%_ptr_Uniform_Buffer1 = OpTypePointer Uniform %Buffer1\n"
1127 "%_ = OpVariable %_ptr_Uniform_Buffer1 Uniform\n"
1128 "%int_0 = OpConstant %int 0\n"
1129 "%_ptr_Uniform_uint = OpTypePointer Uniform %uint\n"
1130 "%main = OpFunction %void None %27\n"
1131 "%57 = OpLabel\n"
1132 "%globalSize = OpVariable %_ptr_Function_v3uint Function\n"
1133 "%offset = OpVariable %_ptr_Function_uint Function\n"
1134 "%bitmask = OpVariable %_ptr_Function__arr_uint_uint_4 Function\n"
1135 "%temp = OpVariable %_ptr_Function_bool Function\n"
1136 "%i = OpVariable %_ptr_Function_uint Function\n"
1137 "%elementIndex = OpVariable %_ptr_Function_uint Function\n"
1138 "%bitPosition = OpVariable %_ptr_Function_uint Function\n"
1139 "%mask = OpVariable %_ptr_Function_uint Function\n"
1140 "%element = OpVariable %_ptr_Function_uint Function\n"
1141 "%tempResult = OpVariable %_ptr_Function_uint Function\n"
1142 "%tempRes = OpVariable %_ptr_Function_uint Function\n"
1143 "%58 = OpLoad %v3uint %gl_NumWorkGroups\n"
1144 "%59 = OpIMul %v3uint %58 %gl_WorkGroupSize\n"
1145 "OpStore %globalSize %59\n"
1146 "%60 = OpAccessChain %_ptr_Function_uint %globalSize %uint_0\n"
1147 "%61 = OpLoad %uint %60\n"
1148 "%62 = OpAccessChain %_ptr_Function_uint %globalSize %uint_1\n"
1149 "%63 = OpLoad %uint %62\n"
1150 "%64 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_2\n"
1151 "%65 = OpLoad %uint %64\n"
1152 "%66 = OpIMul %uint %63 %65\n"
1153 "%67 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_1\n"
1154 "%68 = OpLoad %uint %67\n"
1155 "%69 = OpIAdd %uint %66 %68\n"
1156 "%70 = OpIMul %uint %61 %69\n"
1157 "%71 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0\n"
1158 "%72 = OpLoad %uint %71\n"
1159 "%73 = OpIAdd %uint %70 %72\n"
1160 "OpStore %offset %73\n"
1161 "%74 = OpLoad %v4uint %gl_SubGroupLtMaskARB\n"
1162 "%75 = OpCompositeExtract %uint %74 0\n"
1163 "%76 = OpCompositeExtract %uint %74 1\n"
1164 "%77 = OpCompositeExtract %uint %74 2\n"
1165 "%78 = OpCompositeExtract %uint %74 3\n"
1166 "%79 = OpCompositeConstruct %_arr_uint_uint_4 %75 %76 %77 %78\n"
1167 "OpStore %bitmask %79\n"
1168 "OpStore %temp %true\n"
1169 "OpStore %i %uint_0\n"
1170 "OpBranch %80\n"
1171 "%80 = OpLabel\n"
1172 "OpLoopMerge %81 %82 None\n"
1173 "OpBranch %83\n"
1174 "%83 = OpLabel\n"
1175 "%84 = OpLoad %uint %i\n"
1176 "%85 = OpLoad %uint %gl_SubGroupSizeARB\n"
1177 "%86 = OpULessThan %bool %84 %85\n"
1178 "OpBranchConditional %86 %87 %81\n"
1179 "%87 = OpLabel\n"
1180 "%88 = OpLoad %uint %i\n"
1181 "%89 = OpUDiv %uint %88 %uint_32\n"
1182 "OpStore %elementIndex %89\n"
1183 "%90 = OpLoad %uint %i\n"
1184 "%91 = OpUMod %uint %90 %uint_32\n"
1185 "OpStore %bitPosition %91\n"
1186 "%92 = OpLoad %uint %bitPosition\n"
1187 "%93 = OpShiftLeftLogical %uint %uint_1 %92\n"
1188 "OpStore %mask %93\n"
1189 "%94 = OpLoad %uint %elementIndex\n"
1190 "%95 = OpAccessChain %_ptr_Function_uint %bitmask %94\n"
1191 "%96 = OpLoad %uint %95\n"
1192 "OpStore %element %96\n"
1193 "%97 = OpLoad %uint %i\n"
1194 "%98 = OpLoad %uint %gl_SubGroupInvocationARB\n"
1195 "%99 = OpULessThan %bool %97 %98\n"
1196 "OpSelectionMerge %100 None\n"
1197 "OpBranchConditional %99 %101 %100\n"
1198 "%101 = OpLabel\n"
1199 "%102 = OpLoad %uint %element\n"
1200 "%103 = OpLoad %uint %mask\n"
1201 "%104 = OpBitwiseAnd %uint %102 %103\n"
1202 "%105 = OpIEqual %bool %104 %uint_0\n"
1203 "OpBranch %100\n"
1204 "%100 = OpLabel\n"
1205 "%106 = OpPhi %bool %99 %87 %105 %101\n"
1206 "OpSelectionMerge %107 None\n"
1207 "OpBranchConditional %106 %108 %107\n"
1208 "%108 = OpLabel\n"
1209 "OpStore %temp %false\n"
1210 "OpBranch %107\n"
1211 "%107 = OpLabel\n"
1212 "%109 = OpLoad %uint %i\n"
1213 "%110 = OpLoad %uint %gl_SubGroupInvocationARB\n"
1214 "%111 = OpUGreaterThanEqual %bool %109 %110\n"
1215 "OpSelectionMerge %112 None\n"
1216 "OpBranchConditional %111 %113 %112\n"
1217 "%113 = OpLabel\n"
1218 "%114 = OpLoad %uint %element\n"
1219 "%115 = OpLoad %uint %mask\n"
1220 "%116 = OpBitwiseAnd %uint %114 %115\n"
1221 "%117 = OpINotEqual %bool %116 %uint_0\n"
1222 "OpBranch %112\n"
1223 "%112 = OpLabel\n"
1224 "%118 = OpPhi %bool %111 %107 %117 %113\n"
1225 "OpSelectionMerge %119 None\n"
1226 "OpBranchConditional %118 %120 %119\n"
1227 "%120 = OpLabel\n"
1228 "OpStore %temp %false\n"
1229 "OpBranch %119\n"
1230 "%119 = OpLabel\n"
1231 "OpBranch %82\n"
1232 "%82 = OpLabel\n"
1233 "%121 = OpLoad %uint %i\n"
1234 "%122 = OpIAdd %uint %121 %int_1\n"
1235 "OpStore %i %122\n"
1236 "OpBranch %80\n"
1237 "%81 = OpLabel\n"
1238 "%123 = OpLoad %bool %temp\n"
1239 "%124 = OpSelect %int %123 %int_15 %int_2\n"
1240 "%125 = OpBitcast %uint %124\n"
1241 "OpStore %tempResult %125\n"
1242 "%126 = OpLoad %uint %tempResult\n"
1243 "OpStore %tempRes %126\n"
1244 "%127 = OpLoad %uint %offset\n"
1245 "%128 = OpLoad %uint %tempRes\n"
1246 "%129 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %127\n"
1247 "OpStore %129 %128\n"
1248 "OpReturn\n"
1249 "OpFunctionEnd\n";
1250 break;
1251 default:
1252 TCU_THROW(InternalError, "Unknown mask type");
1253 }
1254 programCollection.spirvAsmSources.add("comp") << compute << buildOptionsSpr;
1255 }
1256 else
1257 {
1258 subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
1259 pointSizeSupport, extHeader, testSrc, "", headDeclarations);
1260 }
1261 }
1262
supportedCheck(Context & context,CaseDefinition caseDef)1263 void supportedCheck(Context &context, CaseDefinition caseDef)
1264 {
1265 if (!subgroups::isSubgroupSupported(context))
1266 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
1267
1268 if (!context.requireDeviceFunctionality("VK_EXT_shader_subgroup_ballot"))
1269 {
1270 TCU_THROW(NotSupportedError, "Device does not support VK_EXT_shader_subgroup_ballot extension");
1271 }
1272
1273 if (!subgroups::isInt64SupportedForDevice(context))
1274 TCU_THROW(NotSupportedError, "Int64 is not supported");
1275
1276 if (caseDef.requiredSubgroupSize)
1277 {
1278 context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
1279
1280 #ifndef CTS_USES_VULKANSC
1281 const VkPhysicalDeviceSubgroupSizeControlFeatures &subgroupSizeControlFeatures =
1282 context.getSubgroupSizeControlFeatures();
1283 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1284 context.getSubgroupSizeControlProperties();
1285 #else
1286 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT &subgroupSizeControlFeatures =
1287 context.getSubgroupSizeControlFeaturesEXT();
1288 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1289 context.getSubgroupSizeControlPropertiesEXT();
1290 #endif // CTS_USES_VULKANSC
1291
1292 if (subgroupSizeControlFeatures.subgroupSizeControl == false)
1293 TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
1294
1295 if (subgroupSizeControlFeatures.computeFullSubgroups == false)
1296 TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
1297
1298 if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
1299 TCU_THROW(NotSupportedError, "Required subgroup size is not supported for shader stage");
1300 }
1301
1302 *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
1303
1304 #ifndef CTS_USES_VULKANSC
1305 if (isAllRayTracingStages(caseDef.shaderStage))
1306 {
1307 context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
1308 }
1309 else if (isAllMeshShadingStages(caseDef.shaderStage))
1310 {
1311 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1312 context.requireDeviceFunctionality("VK_EXT_mesh_shader");
1313
1314 if ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u)
1315 {
1316 const auto &features = context.getMeshShaderFeaturesEXT();
1317 if (!features.taskShader)
1318 TCU_THROW(NotSupportedError, "Task shaders not supported");
1319 }
1320 }
1321 #endif // CTS_USES_VULKANSC
1322
1323 subgroups::supportedCheckShader(context, caseDef.shaderStage);
1324 }
1325
noSSBOtest(Context & context,const CaseDefinition caseDef)1326 TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
1327 {
1328 switch (caseDef.shaderStage)
1329 {
1330 case VK_SHADER_STAGE_VERTEX_BIT:
1331 return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1332 checkVertexPipelineStages);
1333 case VK_SHADER_STAGE_GEOMETRY_BIT:
1334 return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1335 checkVertexPipelineStages);
1336 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1337 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1338 checkVertexPipelineStages);
1339 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1340 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1341 checkVertexPipelineStages);
1342 default:
1343 TCU_THROW(InternalError, "Unhandled shader stage");
1344 }
1345 }
1346
test(Context & context,const CaseDefinition caseDef)1347 TestStatus test(Context &context, const CaseDefinition caseDef)
1348 {
1349 const bool isCompute = isAllComputeStages(caseDef.shaderStage);
1350 #ifndef CTS_USES_VULKANSC
1351 const bool isMesh = isAllMeshShadingStages(caseDef.shaderStage);
1352 #else
1353 const bool isMesh = false;
1354 #endif // CTS_USES_VULKANSC
1355 DE_ASSERT(!(isCompute && isMesh));
1356
1357 if (isCompute || isMesh)
1358 {
1359 #ifndef CTS_USES_VULKANSC
1360 const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
1361 context.getSubgroupSizeControlProperties();
1362 #else
1363 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties =
1364 context.getSubgroupSizeControlPropertiesEXT();
1365 #endif // CTS_USES_VULKANSC
1366 TestLog &log = context.getTestContext().getLog();
1367
1368 if (caseDef.requiredSubgroupSize == false)
1369 {
1370 if (isCompute)
1371 return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeOrMesh);
1372 else
1373 return subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, nullptr, 0, nullptr, checkComputeOrMesh);
1374 }
1375
1376 // gl_SubGroup*MaskARB are uint64_t, so we limit max subgroup size to 64 for this test
1377 uint32_t maxSubgroupSize = min(subgroupSizeControlProperties.maxSubgroupSize, 64U);
1378
1379 log << TestLog::Message << "Testing required subgroup size range ["
1380 << subgroupSizeControlProperties.minSubgroupSize << ", " << maxSubgroupSize
1381 << "]" << TestLog::EndMessage;
1382
1383 // According to the spec, requiredSubgroupSize must be a power-of-two integer.
1384 for (uint32_t size = subgroupSizeControlProperties.minSubgroupSize;
1385 size <= maxSubgroupSize; size *= 2)
1386 {
1387 TestStatus result(QP_TEST_RESULT_INTERNAL_ERROR, "Internal Error");
1388
1389 if (isCompute)
1390 result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0u, DE_NULL,
1391 checkComputeOrMesh, size);
1392 else
1393 result = subgroups::makeMeshTest(context, VK_FORMAT_R32_UINT, nullptr, 0u, nullptr, checkComputeOrMesh,
1394 size);
1395
1396 if (result.getCode() != QP_TEST_RESULT_PASS)
1397 {
1398 log << TestLog::Message << "subgroupSize " << size << " failed" << TestLog::EndMessage;
1399 return result;
1400 }
1401 }
1402
1403 return TestStatus::pass("OK");
1404 }
1405 else if (isAllGraphicsStages(caseDef.shaderStage))
1406 {
1407 const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
1408
1409 return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages,
1410 stages);
1411 }
1412 #ifndef CTS_USES_VULKANSC
1413 else if (isAllRayTracingStages(caseDef.shaderStage))
1414 {
1415 const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
1416
1417 return subgroups::allRayTracingStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL,
1418 checkVertexPipelineStages, stages);
1419 }
1420 #endif // CTS_USES_VULKANSC
1421 else
1422 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1423 }
1424 } // namespace
1425
1426 namespace vkt
1427 {
1428 namespace subgroups
1429 {
createSubgroupsBallotMasksTests(TestContext & testCtx)1430 TestCaseGroup *createSubgroupsBallotMasksTests(TestContext &testCtx)
1431 {
1432 de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "ballot_mask"));
1433 de::MovePtr<TestCaseGroup> groupARB(new TestCaseGroup(testCtx, "ext_shader_subgroup_ballot"));
1434 de::MovePtr<TestCaseGroup> graphicGroup(new TestCaseGroup(testCtx, "graphics"));
1435 de::MovePtr<TestCaseGroup> computeGroup(new TestCaseGroup(testCtx, "compute"));
1436 de::MovePtr<TestCaseGroup> framebufferGroup(new TestCaseGroup(testCtx, "framebuffer"));
1437 #ifndef CTS_USES_VULKANSC
1438 de::MovePtr<TestCaseGroup> raytracingGroup(new TestCaseGroup(testCtx, "ray_tracing"));
1439 de::MovePtr<TestCaseGroup> meshGroup(new TestCaseGroup(testCtx, "mesh"));
1440 #endif // CTS_USES_VULKANSC
1441 const VkShaderStageFlags fbStages[] = {
1442 VK_SHADER_STAGE_VERTEX_BIT,
1443 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1444 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1445 VK_SHADER_STAGE_GEOMETRY_BIT,
1446 };
1447 #ifndef CTS_USES_VULKANSC
1448 const VkShaderStageFlags meshStages[] = {
1449 VK_SHADER_STAGE_MESH_BIT_EXT,
1450 VK_SHADER_STAGE_TASK_BIT_EXT,
1451 };
1452 #endif // CTS_USES_VULKANSC
1453 const bool boolValues[] = {false, true};
1454
1455 for (int maskTypeIndex = 0; maskTypeIndex < MASKTYPE_LAST; ++maskTypeIndex)
1456 {
1457 const MaskType maskType = static_cast<MaskType>(maskTypeIndex);
1458 const string mask = de::toLower(getMaskTypeName(maskType));
1459
1460 for (size_t groupSizeNdx = 0; groupSizeNdx < DE_LENGTH_OF_ARRAY(boolValues); ++groupSizeNdx)
1461 {
1462 const bool requiredSubgroupSize = boolValues[groupSizeNdx];
1463 const string testName = mask + (requiredSubgroupSize ? "_requiredsubgroupsize" : "");
1464 const CaseDefinition caseDef = {
1465 maskType, // MaskType maskType;
1466 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlags shaderStage;
1467 de::SharedPtr<bool>(new bool), // de::SharedPtr<bool> geometryPointSizeSupported;
1468 requiredSubgroupSize, // bool requiredSubgroupSize;
1469 };
1470
1471 addFunctionCaseWithPrograms(computeGroup.get(), testName, supportedCheck, initPrograms, test, caseDef);
1472 }
1473
1474 #ifndef CTS_USES_VULKANSC
1475 for (size_t groupSizeNdx = 0; groupSizeNdx < DE_LENGTH_OF_ARRAY(boolValues); ++groupSizeNdx)
1476 {
1477 for (const auto &stage : meshStages)
1478 {
1479 const bool requiredSubgroupSize = boolValues[groupSizeNdx];
1480 const string testName = mask + (requiredSubgroupSize ? "_requiredsubgroupsize" : "");
1481 const CaseDefinition caseDef = {
1482 maskType, // MaskType maskType;
1483 stage, // VkShaderStageFlags shaderStage;
1484 de::SharedPtr<bool>(new bool), // de::SharedPtr<bool> geometryPointSizeSupported;
1485 requiredSubgroupSize, // bool requiredSubgroupSize;
1486 };
1487
1488 addFunctionCaseWithPrograms(meshGroup.get(), testName + "_" + getShaderStageName(stage), supportedCheck,
1489 initPrograms, test, caseDef);
1490 }
1491 }
1492 #endif // CTS_USES_VULKANSC
1493
1494 {
1495 const CaseDefinition caseDef = {
1496 maskType, // MaskType maskType;
1497 VK_SHADER_STAGE_ALL_GRAPHICS, // VkShaderStageFlags shaderStage;
1498 de::SharedPtr<bool>(new bool), // de::SharedPtr<bool> geometryPointSizeSupported;
1499 false // bool requiredSubgroupSize;
1500 };
1501
1502 addFunctionCaseWithPrograms(graphicGroup.get(), mask, supportedCheck, initPrograms, test, caseDef);
1503 }
1504
1505 #ifndef CTS_USES_VULKANSC
1506 {
1507 const CaseDefinition caseDef = {
1508 maskType, // MaskType maskType;
1509 SHADER_STAGE_ALL_RAY_TRACING, // VkShaderStageFlags shaderStage;
1510 de::SharedPtr<bool>(new bool), // de::SharedPtr<bool> geometryPointSizeSupported;
1511 false // bool requiredSubgroupSize;
1512 };
1513
1514 addFunctionCaseWithPrograms(raytracingGroup.get(), mask, supportedCheck, initPrograms, test, caseDef);
1515 }
1516 #endif // CTS_USES_VULKANSC
1517
1518 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1519 {
1520 const CaseDefinition caseDef = {
1521 maskType, // MaskType maskType;
1522 fbStages[stageIndex], // VkShaderStageFlags shaderStage;
1523 de::SharedPtr<bool>(new bool), // de::SharedPtr<bool> geometryPointSizeSupported;
1524 false // bool requiredSubgroupSize;
1525 };
1526 const string testName = mask + "_" + getShaderStageName(caseDef.shaderStage);
1527
1528 addFunctionCaseWithPrograms(framebufferGroup.get(), testName, supportedCheck, initFrameBufferPrograms,
1529 noSSBOtest, caseDef);
1530 }
1531 }
1532
1533 groupARB->addChild(graphicGroup.release());
1534 groupARB->addChild(computeGroup.release());
1535 groupARB->addChild(framebufferGroup.release());
1536 #ifndef CTS_USES_VULKANSC
1537 groupARB->addChild(raytracingGroup.release());
1538 groupARB->addChild(meshGroup.release());
1539 #endif // CTS_USES_VULKANSC
1540 group->addChild(groupARB.release());
1541
1542 return group.release();
1543 }
1544
1545 } // namespace subgroups
1546 } // namespace vkt
1547