1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  * Copyright (c) 2020 Google LLC.
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief VK_KHR_zero_initialize_workgroup_memory tests
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktTestGroupUtil.hpp"
31 #include "vktAmberTestCase.hpp"
32 
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42 
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45 
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49 
50 #include <algorithm>
51 #include <vector>
52 
53 using namespace vk;
54 
55 namespace vkt
56 {
57 namespace compute
58 {
59 namespace
60 {
61 
runCompute(Context & context,uint32_t bufferSize,uint32_t numWGX,uint32_t numWGY,uint32_t numWGZ,vk::ComputePipelineConstructionType m_computePipelineConstructionType,const std::vector<uint32_t> specValues={},uint32_t increment=0)62 tcu::TestStatus runCompute(Context &context, uint32_t bufferSize, uint32_t numWGX, uint32_t numWGY, uint32_t numWGZ,
63                            vk::ComputePipelineConstructionType m_computePipelineConstructionType,
64                            const std::vector<uint32_t> specValues = {}, uint32_t increment = 0)
65 {
66     const DeviceInterface &vk = context.getDeviceInterface();
67     const VkDevice device     = context.getDevice();
68     Allocator &allocator      = context.getDefaultAllocator();
69     tcu::TestLog &log         = context.getTestContext().getLog();
70 
71     de::MovePtr<BufferWithMemory> buffer;
72     VkDescriptorBufferInfo bufferDescriptor;
73 
74     VkDeviceSize size = bufferSize;
75     buffer            = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
76         vk, device, allocator,
77         makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
78                                                   VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
79         MemoryRequirement::HostVisible));
80     bufferDescriptor  = makeDescriptorBufferInfo(**buffer, 0, size);
81 
82     uint32_t *ptr = (uint32_t *)buffer->getAllocation().getHostPtr();
83     deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
84 
85     DescriptorSetLayoutBuilder layoutBuilder;
86     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
87 
88     Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
89     Unique<VkDescriptorPool> descriptorPool(
90         DescriptorPoolBuilder()
91             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
92             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
93     Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
94 
95     std::vector<VkSpecializationMapEntry> entries(specValues.size());
96     if (!specValues.empty())
97     {
98         for (uint32_t i = 0; i < specValues.size(); ++i)
99         {
100             entries[i] = {i, (uint32_t)(sizeof(uint32_t) * i), sizeof(uint32_t)};
101         }
102     }
103     const VkSpecializationInfo specInfo = {
104         (uint32_t)specValues.size(),
105         entries.data(),
106         specValues.size() * sizeof(uint32_t),
107         specValues.data(),
108     };
109     VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
110     flushAlloc(vk, device, buffer->getAllocation());
111 
112     ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
113                                     context.getBinaryCollection().get("comp"));
114     pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
115     pipeline.setSpecializationInfo(specInfo);
116     pipeline.buildPipeline();
117 
118     const VkQueue queue             = context.getUniversalQueue();
119     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
120                                                         context.getUniversalQueueFamilyIndex());
121     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
122 
123     DescriptorSetUpdateBuilder setUpdateBuilder;
124     setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
125                                  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
126     setUpdateBuilder.update(vk, device);
127 
128     beginCommandBuffer(vk, *cmdBuffer, 0);
129 
130     vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
131     pipeline.bind(*cmdBuffer);
132 
133     vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
134 
135     const VkMemoryBarrier barrier = {
136         VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
137         nullptr,                          // pNext
138         VK_ACCESS_SHADER_WRITE_BIT,       // srcAccessMask
139         VK_ACCESS_HOST_READ_BIT,          // dstAccessMask
140     };
141     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
142                           (VkDependencyFlags)0, 1, &barrier, 0, nullptr, 0, nullptr);
143 
144     endCommandBuffer(vk, *cmdBuffer);
145 
146     submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
147 
148     invalidateAlloc(vk, device, buffer->getAllocation());
149 
150     for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
151     {
152         uint32_t expected = increment ? numWGX * numWGY * numWGZ : 0u;
153         if (ptr[i] != expected)
154         {
155             log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i]
156                 << tcu::TestLog::EndMessage;
157             return tcu::TestStatus::fail("compute failed");
158         }
159     }
160 
161     return tcu::TestStatus::pass("compute succeeded");
162 }
163 
164 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
165 {
166 public:
MaxWorkgroupMemoryInstance(Context & context,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)167     MaxWorkgroupMemoryInstance(Context &context, uint32_t numWorkgroups,
168                                const vk::ComputePipelineConstructionType computePipelineConstructionType)
169         : TestInstance(context)
170         , m_numWorkgroups(numWorkgroups)
171         , m_computePipelineConstructionType(computePipelineConstructionType)
172     {
173     }
174     tcu::TestStatus iterate(void);
175 
176 private:
177     uint32_t m_numWorkgroups;
178     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
179 };
180 
181 class MaxWorkgroupMemoryTest : public vkt::TestCase
182 {
183 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)184     MaxWorkgroupMemoryTest(tcu::TestContext &testCtx, const std::string &name, uint32_t numWorkgroups,
185                            const vk::ComputePipelineConstructionType computePipelineConstructionType)
186         : TestCase(testCtx, name)
187         , m_numWorkgroups(numWorkgroups)
188         , m_computePipelineConstructionType(computePipelineConstructionType)
189     {
190     }
191 
192     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const193     TestInstance *createInstance(Context &context) const
194     {
195         return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups, m_computePipelineConstructionType);
196     }
197     virtual void checkSupport(Context &context) const;
198 
199 private:
200     uint32_t m_numWorkgroups;
201     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
202 };
203 
checkSupport(Context & context) const204 void MaxWorkgroupMemoryTest::checkSupport(Context &context) const
205 {
206     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
207     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
208                                   m_computePipelineConstructionType);
209 }
210 
initPrograms(SourceCollections & sourceCollections) const211 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections &sourceCollections) const
212 {
213     std::ostringstream src;
214     src << "#version 450\n";
215     src << "#extension GL_EXT_null_initializer : enable\n";
216     src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
217     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
218     src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
219     src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
220     src << "shared uvec4 wg_mem[num_elems] = {};\n";
221     src << "void main() {\n";
222     src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
223     src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
224     src << "  uint idx_x = gl_LocalInvocationID.x;\n";
225     src << "  uint idx = idx_x + idx_y + idx_z;\n";
226     src << "  uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
227     src << "  for (uint i = 0; i < num_elems; ++i) {\n";
228     src << "    for (uint j = 0; j < 4; ++j) {\n";
229     src << "      uint shared_idx = 4*i + j;\n";
230     src << "      uint wg_val = wg_mem[i][j];\n";
231     src << "      if (idx == shared_idx) {\n";
232     src << "        atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
233     src << "      } else if (idx == 0 && shared_idx >= wg_size) {\n";
234     src << "        atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
235     src << "      }\n";
236     src << "    }\n";
237     src << "  }\n";
238     src << "}\n";
239 
240     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
241 }
242 
iterate(void)243 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
244 {
245     VkPhysicalDeviceProperties properties;
246     m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
247     const uint32_t maxMemSize = properties.limits.maxComputeSharedMemorySize;
248 
249     const uint32_t maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
250     uint32_t wgx         = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
251     uint32_t wgy         = 1;
252     uint32_t wgz         = 1;
253     if (wgx < maxWG)
254     {
255         wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
256     }
257     if ((wgx * wgy) < maxWG)
258     {
259         wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
260     }
261     const uint32_t size     = maxMemSize;
262     const uint32_t numElems = maxMemSize / 16;
263 
264     return runCompute(m_context, size, m_numWorkgroups, 1, 1, m_computePipelineConstructionType,
265                       {wgx, wgy, wgz, numElems}, /*increment*/ 1);
266 }
267 
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)268 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup *group,
269                                 vk::ComputePipelineConstructionType computePipelineConstructionType)
270 {
271     std::vector<uint32_t> workgroups = {1, 2, 4, 16, 64, 128};
272     for (uint32_t i = 0; i < workgroups.size(); ++i)
273     {
274         uint32_t numWG = workgroups[i];
275         group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(), de::toString(numWG), numWG,
276                                                    computePipelineConstructionType));
277     }
278 }
279 
280 struct TypeCaseDef
281 {
282     std::string typeName;
283     uint32_t typeSize;
284     uint32_t numElements;
285     uint32_t numRows;
286     uint32_t numVariables;
287 };
288 
289 class TypeTestInstance : public vkt::TestInstance
290 {
291 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)292     TypeTestInstance(Context &context, const TypeCaseDef &caseDef,
293                      const vk::ComputePipelineConstructionType computePipelineConstructionType)
294         : TestInstance(context)
295         , m_caseDef(caseDef)
296         , m_computePipelineConstructionType(computePipelineConstructionType)
297     {
298     }
299     tcu::TestStatus iterate(void);
300 
301 private:
302     TypeCaseDef m_caseDef;
303     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
304 };
305 
306 class TypeTest : public vkt::TestCase
307 {
308 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)309     TypeTest(tcu::TestContext &testCtx, const std::string &name, const TypeCaseDef &caseDef,
310              const vk::ComputePipelineConstructionType computePipelineConstructionType)
311         : TestCase(testCtx, name)
312         , m_caseDef(caseDef)
313         , m_computePipelineConstructionType(computePipelineConstructionType)
314     {
315     }
316 
317     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const318     TestInstance *createInstance(Context &context) const
319     {
320         return new TypeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
321     }
322     virtual void checkSupport(Context &context) const;
323 
324 private:
325     TypeCaseDef m_caseDef;
326     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
327 };
328 
checkSupport(Context & context) const329 void TypeTest::checkSupport(Context &context) const
330 {
331     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
332     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
333                                   m_computePipelineConstructionType);
334 
335     VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
336     deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
337     f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
338     f16_i8_features.pNext = DE_NULL;
339 
340     VkPhysicalDeviceFeatures2 features2;
341     deMemset(&features2, 0, sizeof(features2));
342     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
343     features2.pNext = &f16_i8_features;
344     context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
345 
346     if (m_caseDef.typeName == "float16_t" || m_caseDef.typeName == "f16vec2" || m_caseDef.typeName == "f16vec3" ||
347         m_caseDef.typeName == "f16vec4" || m_caseDef.typeName == "f16mat2x2" || m_caseDef.typeName == "f16mat2x3" ||
348         m_caseDef.typeName == "f16mat2x4" || m_caseDef.typeName == "f16mat3x2" || m_caseDef.typeName == "f16mat3x3" ||
349         m_caseDef.typeName == "f16mat3x4" || m_caseDef.typeName == "f16mat4x2" || m_caseDef.typeName == "f16mat4x3" ||
350         m_caseDef.typeName == "f16mat4x4")
351     {
352         if (f16_i8_features.shaderFloat16 != VK_TRUE)
353             TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
354     }
355 
356     if (m_caseDef.typeName == "float64_t" || m_caseDef.typeName == "f64vec2" || m_caseDef.typeName == "f64vec3" ||
357         m_caseDef.typeName == "f64vec4" || m_caseDef.typeName == "f64mat2x2" || m_caseDef.typeName == "f64mat2x3" ||
358         m_caseDef.typeName == "f64mat2x4" || m_caseDef.typeName == "f64mat3x2" || m_caseDef.typeName == "f64mat3x3" ||
359         m_caseDef.typeName == "f64mat3x4" || m_caseDef.typeName == "f64mat4x2" || m_caseDef.typeName == "f64mat4x3" ||
360         m_caseDef.typeName == "f64mat4x4")
361     {
362         if (features2.features.shaderFloat64 != VK_TRUE)
363             TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
364     }
365 
366     if (m_caseDef.typeName == "int8_t" || m_caseDef.typeName == "i8vec2" || m_caseDef.typeName == "i8vec3" ||
367         m_caseDef.typeName == "i8vec4" || m_caseDef.typeName == "uint8_t" || m_caseDef.typeName == "u8vec2" ||
368         m_caseDef.typeName == "u8vec3" || m_caseDef.typeName == "u8vec4")
369     {
370         if (f16_i8_features.shaderInt8 != VK_TRUE)
371             TCU_THROW(NotSupportedError, "shaderInt8 not supported");
372     }
373 
374     if (m_caseDef.typeName == "int16_t" || m_caseDef.typeName == "i16vec2" || m_caseDef.typeName == "i16vec3" ||
375         m_caseDef.typeName == "i16vec4" || m_caseDef.typeName == "uint16_t" || m_caseDef.typeName == "u16vec2" ||
376         m_caseDef.typeName == "u16vec3" || m_caseDef.typeName == "u16vec4")
377     {
378         if (features2.features.shaderInt16 != VK_TRUE)
379             TCU_THROW(NotSupportedError, "shaderInt16 not supported");
380     }
381 
382     if (m_caseDef.typeName == "int64_t" || m_caseDef.typeName == "i64vec2" || m_caseDef.typeName == "i64vec3" ||
383         m_caseDef.typeName == "i64vec4" || m_caseDef.typeName == "uint64_t" || m_caseDef.typeName == "u64vec2" ||
384         m_caseDef.typeName == "u64vec3" || m_caseDef.typeName == "u64vec4")
385     {
386         if (features2.features.shaderInt64 != VK_TRUE)
387             TCU_THROW(NotSupportedError, "shaderInt64 not supported");
388     }
389 }
390 
initPrograms(SourceCollections & sourceCollections) const391 void TypeTest::initPrograms(SourceCollections &sourceCollections) const
392 {
393     std::ostringstream src;
394     src << "#version 450\n";
395     src << "#extension GL_EXT_null_initializer : enable\n";
396     src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
397     src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows
398         << ", local_size_y = 1, local_size_z = 1) in;\n";
399     src << "layout(set = 0, binding = 0) buffer A  { uint a[]; } a;\n";
400     for (uint32_t i = 0; i < m_caseDef.numVariables; ++i)
401     {
402         src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
403     }
404     src << "void main() {\n";
405     if (m_caseDef.numRows > 1)
406     {
407         src << "  uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
408         src << "  uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
409     }
410     std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
411     for (uint32_t v = 0; v < m_caseDef.numVariables; ++v)
412     {
413         if (m_caseDef.numElements == 1)
414         {
415             // Scalars.
416             src << "  a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") ==  0) ? 0 : 1;\n";
417         }
418         else if (m_caseDef.numRows == 1)
419         {
420             // Vectors.
421             src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
422                 << "(wg_mem" << v << "[gl_LocalInvocationID.x]) ==  0) ? 0 : 1;\n";
423         }
424         else
425         {
426             // Matrices.
427             src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
428                 << "(wg_mem" << v << "[row][col]) ==  0) ? 0 : 1;\n";
429         }
430     }
431     src << "}\n";
432 
433     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
434 }
435 
iterate(void)436 tcu::TestStatus TypeTestInstance::iterate(void)
437 {
438     const uint32_t varBytes = m_caseDef.numElements * m_caseDef.numRows * (uint32_t)sizeof(uint32_t);
439     return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1, m_computePipelineConstructionType);
440 }
441 
AddTypeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)442 void AddTypeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
443 {
444     deRandom rnd;
445     deRandom_init(&rnd, 0);
446     std::vector<TypeCaseDef> cases = {
447         {"bool", 1, 1, 1, 0},      {"bvec2", 1, 2, 1, 0},     {"bvec3", 1, 3, 1, 0},     {"bvec4", 1, 4, 1, 0},
448         {"uint32_t", 4, 1, 1, 0},  {"uvec2", 4, 2, 1, 0},     {"uvec3", 4, 3, 1, 0},     {"uvec4", 4, 4, 1, 0},
449         {"int32_t", 4, 1, 1, 0},   {"ivec2", 4, 2, 1, 0},     {"ivec3", 4, 3, 1, 0},     {"ivec4", 4, 4, 1, 0},
450         {"uint8_t", 1, 1, 1, 0},   {"u8vec2", 1, 2, 1, 0},    {"u8vec3", 1, 3, 1, 0},    {"u8vec4", 1, 4, 1, 0},
451         {"int8_t", 1, 1, 1, 0},    {"i8vec2", 1, 2, 1, 0},    {"i8vec3", 1, 3, 1, 0},    {"i8vec4", 1, 4, 1, 0},
452         {"uint16_t", 2, 1, 1, 0},  {"u16vec2", 2, 2, 1, 0},   {"u16vec3", 2, 3, 1, 0},   {"u16vec4", 2, 4, 1, 0},
453         {"int16_t", 2, 1, 1, 0},   {"i16vec2", 2, 2, 1, 0},   {"i16vec3", 2, 3, 1, 0},   {"i16vec4", 2, 4, 1, 0},
454         {"uint64_t", 8, 1, 1, 0},  {"u64vec2", 8, 2, 1, 0},   {"u64vec3", 8, 3, 1, 0},   {"u64vec4", 8, 4, 1, 0},
455         {"int64_t", 8, 1, 1, 0},   {"i64vec2", 8, 2, 1, 0},   {"i64vec3", 8, 3, 1, 0},   {"i64vec4", 8, 4, 1, 0},
456         {"float32_t", 4, 1, 1, 0}, {"f32vec2", 4, 2, 1, 0},   {"f32vec3", 4, 3, 1, 0},   {"f32vec4", 4, 4, 1, 0},
457         {"f32mat2x2", 4, 2, 2, 0}, {"f32mat2x3", 4, 3, 2, 0}, {"f32mat2x4", 4, 4, 2, 0}, {"f32mat3x2", 4, 2, 3, 0},
458         {"f32mat3x3", 4, 3, 3, 0}, {"f32mat3x4", 4, 4, 3, 0}, {"f32mat4x2", 4, 2, 4, 0}, {"f32mat4x3", 4, 3, 4, 0},
459         {"f32mat4x4", 4, 4, 4, 0}, {"float16_t", 2, 1, 1, 0}, {"f16vec2", 2, 2, 1, 0},   {"f16vec3", 2, 3, 1, 0},
460         {"f16vec4", 2, 4, 1, 0},   {"f16mat2x2", 2, 2, 2, 0}, {"f16mat2x3", 2, 3, 2, 0}, {"f16mat2x4", 2, 4, 2, 0},
461         {"f16mat3x2", 2, 2, 3, 0}, {"f16mat3x3", 2, 3, 3, 0}, {"f16mat3x4", 2, 4, 3, 0}, {"f16mat4x2", 2, 2, 4, 0},
462         {"f16mat4x3", 2, 3, 4, 0}, {"f16mat4x4", 2, 4, 4, 0}, {"float64_t", 8, 1, 1, 0}, {"f64vec2", 8, 2, 1, 0},
463         {"f64vec3", 8, 3, 1, 0},   {"f64vec4", 8, 4, 1, 0},   {"f64mat2x2", 8, 2, 2, 0}, {"f64mat2x3", 8, 3, 2, 0},
464         {"f64mat2x4", 8, 4, 2, 0}, {"f64mat3x2", 8, 2, 3, 0}, {"f64mat3x3", 8, 3, 3, 0}, {"f64mat3x4", 8, 4, 3, 0},
465         {"f64mat4x2", 8, 2, 4, 0}, {"f64mat4x3", 8, 3, 4, 0}, {"f64mat4x4", 8, 4, 4, 0},
466     };
467 
468     for (uint32_t i = 0; i < cases.size(); ++i)
469     {
470         cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
471         group->addChild(new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i],
472                                      computePipelineConstructionType));
473     }
474 }
475 
476 struct CompositeCaseDef
477 {
478     uint32_t index;
479     std::string typeDefinition;
480     std::string assignment;
481     uint32_t elements;
482     std::vector<uint32_t> specValues;
483 
CompositeCaseDefvkt::compute::__anon39880ba10111::CompositeCaseDef484     CompositeCaseDef(uint32_t index_, const std::string &typeDefinition_, const std::string &assignment_,
485                      uint32_t elements_, const std::vector<uint32_t> &specValues_)
486         : index(index_)
487         , typeDefinition(typeDefinition_)
488         , assignment(assignment_)
489         , elements(elements_)
490         , specValues(specValues_)
491     {
492     }
493 };
494 
495 class CompositeTestInstance : public vkt::TestInstance
496 {
497 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)498     CompositeTestInstance(Context &context, const CompositeCaseDef &caseDef,
499                           const vk::ComputePipelineConstructionType computePipelineConstructionType)
500         : TestInstance(context)
501         , m_caseDef(caseDef)
502         , m_computePipelineConstructionType(computePipelineConstructionType)
503     {
504     }
505     tcu::TestStatus iterate(void);
506 
507 private:
508     CompositeCaseDef m_caseDef;
509     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
510 };
511 
512 class CompositeTest : public vkt::TestCase
513 {
514 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)515     CompositeTest(tcu::TestContext &testCtx, const std::string &name, const CompositeCaseDef &caseDef,
516                   const vk::ComputePipelineConstructionType computePipelineConstructionType)
517         : TestCase(testCtx, name)
518         , m_caseDef(caseDef)
519         , m_computePipelineConstructionType(computePipelineConstructionType)
520     {
521     }
522 
523     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const524     TestInstance *createInstance(Context &context) const
525     {
526         return new CompositeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
527     }
528     virtual void checkSupport(Context &context) const;
529 
530 private:
531     CompositeCaseDef m_caseDef;
532     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
533 };
534 
checkSupport(Context & context) const535 void CompositeTest::checkSupport(Context &context) const
536 {
537     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
538     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
539                                   m_computePipelineConstructionType);
540 
541     VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
542     deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
543     f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
544     f16_i8_features.pNext = DE_NULL;
545 
546     VkPhysicalDeviceFeatures2 features2;
547     deMemset(&features2, 0, sizeof(features2));
548     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
549     features2.pNext = &f16_i8_features;
550     context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
551 
552     bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
553     bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
554     bool needsInt8    = (m_caseDef.index & 0x4) != 0;
555     bool needsInt16   = (m_caseDef.index & 0x8) != 0;
556     bool needsInt64   = (m_caseDef.index & 0x10) != 0;
557 
558     if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
559         TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
560     if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
561         TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
562     if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
563         TCU_THROW(NotSupportedError, "shaderInt8 not supported");
564     if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
565         TCU_THROW(NotSupportedError, "shaderInt16 not supported");
566     if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
567         TCU_THROW(NotSupportedError, "shaderInt64 not supported");
568 }
569 
initPrograms(SourceCollections & sourceCollections) const570 void CompositeTest::initPrograms(SourceCollections &sourceCollections) const
571 {
572     std::ostringstream src;
573     src << "#version 450\n";
574     src << "#extension GL_EXT_null_initializer : enable\n";
575     src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
576     src << "\n";
577     for (uint32_t i = 0; i < m_caseDef.specValues.size(); ++i)
578     {
579         src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
580     }
581     src << "\n";
582     src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
583     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
584     src << "\n";
585     src << m_caseDef.typeDefinition;
586     src << "\n";
587     src << "void main() {\n";
588     src << m_caseDef.assignment;
589     src << "}\n";
590 
591     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
592 }
593 
iterate(void)594 tcu::TestStatus CompositeTestInstance::iterate(void)
595 {
596     const uint32_t bufferSize = (uint32_t)sizeof(uint32_t) * m_caseDef.elements;
597     return runCompute(m_context, bufferSize, 1, 1, 1, m_computePipelineConstructionType, m_caseDef.specValues);
598 }
599 
AddCompositeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)600 void AddCompositeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
601 {
602     const std::vector<CompositeCaseDef> cases{
603         {
604             0,
605             "shared uint wg_mem[specId0] = {};\n",
606 
607             "for (uint i = 0; i < specId0; ++i) {\n"
608             "  a.a[i] = wg_mem[i];\n"
609             "}\n",
610             16,
611             {16},
612         },
613 
614         {
615             0,
616             "shared float wg_mem[specId0][specId1] = {};\n",
617 
618             "for (uint i = 0; i < specId0; ++i) {\n"
619             "  for (uint j = 0; j < specId1; ++j) {\n"
620             "    uint idx = i * specId1 + j;\n"
621             "    a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
622             "  }\n"
623             "}\n",
624             32,
625             {4, 8},
626         },
627 
628         {
629             0,
630             "struct Sa {\n"
631             "  uint a;\n"
632             "  uvec2 b;\n"
633             "  uvec3 c;\n"
634             "  uvec4 d;\n"
635             "  float e;\n"
636             "  vec2 f;\n"
637             "  vec3 g;\n"
638             "  vec4 h;\n"
639             "  bool i;\n"
640             "  bvec2 j;\n"
641             "  bvec3 k;\n"
642             "  bvec4 l;\n"
643             "};\n"
644             "shared Sa wg_mem = {};\n",
645 
646             "uint i = 0;\n"
647             "a.a[i++] = wg_mem.a;\n"
648             "a.a[i++] = wg_mem.b.x;\n"
649             "a.a[i++] = wg_mem.b.y;\n"
650             "a.a[i++] = wg_mem.c.x;\n"
651             "a.a[i++] = wg_mem.c.y;\n"
652             "a.a[i++] = wg_mem.c.z;\n"
653             "a.a[i++] = wg_mem.d.x;\n"
654             "a.a[i++] = wg_mem.d.y;\n"
655             "a.a[i++] = wg_mem.d.z;\n"
656             "a.a[i++] = wg_mem.d.w;\n"
657             "a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
658             "a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
659             "a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
660             "a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
661             "a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
662             "a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
663             "a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
664             "a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
665             "a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
666             "a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
667             "a.a[i++] = wg_mem.i ? 1 : 0;\n"
668             "a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
669             "a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
670             "a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
671             "a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
672             "a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
673             "a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
674             "a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
675             "a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
676             "a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
677             30,
678             {},
679         },
680 
681         {
682             0,
683             "struct Sa {\n"
684             "  uint a;\n"
685             "};\n"
686             "struct Sb {\n"
687             "  uvec2 a;\n"
688             "};\n"
689             "struct Sc {\n"
690             "  Sa a[specId0];\n"
691             "  Sb b[specId1];\n"
692             "};\n"
693             "shared Sc wg_mem[specId2] = {};\n",
694 
695             "uint idx = 0;\n"
696             "for (uint i = 0; i < specId2; ++i) {\n"
697             "  for (uint j = 0; j < specId0; ++j) {\n"
698             "    a.a[idx++] = wg_mem[i].a[j].a;\n"
699             "  }\n"
700             "  for (uint j = 0; j < specId1; ++j) {\n"
701             "    a.a[idx++] = wg_mem[i].b[j].a.x;\n"
702             "    a.a[idx++] = wg_mem[i].b[j].a.y;\n"
703             "  }\n"
704             "}\n",
705             32,
706             {2, 3, 4},
707         },
708 
709         {
710             1,
711             "struct Sa {\n"
712             "  f16vec2 a;\n"
713             "  float16_t b[specId0];\n"
714             "};\n"
715             "shared Sa wg_mem = {};\n",
716 
717             "uint idx = 0;\n"
718             "a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
719             "a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
720             "for (uint i = 0; i < specId0; ++i) {\n"
721             "  a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
722             "}\n",
723             18,
724             {16},
725         },
726 
727         {
728             2,
729             "struct Sa {\n"
730             "  f64vec2 a;\n"
731             "  float64_t b[specId0];\n"
732             "};\n"
733             "shared Sa wg_mem = {};\n",
734 
735             "uint idx = 0;\n"
736             "a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
737             "a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
738             "for (uint i = 0; i < specId0; ++i) {\n"
739             "  a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
740             "}\n",
741             7,
742             {5},
743         },
744 
745         {
746             4,
747             "struct Sa {\n"
748             "  i8vec2 a;\n"
749             "  int8_t b[specId0];\n"
750             "};\n"
751             "shared Sa wg_mem = {};\n",
752 
753             "uint idx = 0;\n"
754             "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
755             "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
756             "for (uint i = 0; i < specId0; ++i) {\n"
757             "  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
758             "}\n",
759             34,
760             {32},
761         },
762 
763         {
764             8,
765             "struct Sa {\n"
766             "  i16vec2 a;\n"
767             "  int16_t b[specId0];\n"
768             "};\n"
769             "shared Sa wg_mem = {};\n",
770 
771             "uint idx = 0;\n"
772             "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
773             "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
774             "for (uint i = 0; i < specId0; ++i) {\n"
775             "  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
776             "}\n",
777             122,
778             {120},
779         },
780 
781         {
782             16,
783             "struct Sa {\n"
784             "  i64vec2 a;\n"
785             "  int64_t b[specId0];\n"
786             "};\n"
787             "shared Sa wg_mem = {};\n",
788 
789             "uint idx = 0;\n"
790             "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
791             "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
792             "for (uint i = 0; i < specId0; ++i) {\n"
793             "  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
794             "}\n",
795             63,
796             {61},
797         },
798 
799         {
800             0x1f,
801             "struct Sa {\n"
802             "  float16_t a;\n"
803             "  float b;\n"
804             "  int8_t c;\n"
805             "  int16_t d;\n"
806             "  int e;\n"
807             "  int64_t f;\n"
808             "  float64_t g;\n"
809             "};\n"
810             "shared Sa wg_mem = {};\n",
811 
812             "uint idx = 0;\n"
813             "a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
814             "a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
815             "a.a[idx++] = uint(wg_mem.c);\n"
816             "a.a[idx++] = uint(wg_mem.d);\n"
817             "a.a[idx++] = uint(wg_mem.e);\n"
818             "a.a[idx++] = uint(wg_mem.f);\n"
819             "a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
820             7,
821             {},
822         },
823 
824         {
825             0,
826             "struct Sa {\n"
827             "  uint a;\n"
828             "};\n"
829             "struct Sb {\n"
830             "  Sa a[specId0];\n"
831             "  uint b;\n"
832             "};\n"
833             "struct Sc {\n"
834             "  Sb b[specId1];\n"
835             "  uint c;\n"
836             "};\n"
837             "struct Sd {\n"
838             "  Sc c[specId2];\n"
839             "  uint d;\n"
840             "};\n"
841             "struct Se {\n"
842             "  Sd d[specId3];\n"
843             "  uint e;\n"
844             "};\n"
845             "shared Se wg_mem[specId4] = {};\n",
846 
847             "uint idx = 0;\n"
848             "for (uint i1 = 0; i1 < specId4; ++i1) {\n"
849             "  a.a[idx++] = wg_mem[i1].e;\n"
850             "  for (uint i2 = 0; i2 < specId3; ++i2) {\n"
851             "    a.a[idx++] = wg_mem[i1].d[i2].d;\n"
852             "    for (uint i3 = 0; i3 < specId2; ++i3) {\n"
853             "      a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
854             "      for (uint i4 = 0; i4 < specId1; ++i4) {\n"
855             "        a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
856             "        for (uint i5 = 0; i5 < specId0; ++i5) {\n"
857             "          a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
858             "        }\n"
859             "      }\n"
860             "    }\n"
861             "  }\n"
862             "}\n",
863             872,
864             {6, 5, 4, 3, 2},
865         },
866     };
867 
868     for (uint32_t i = 0; i < cases.size(); ++i)
869     {
870         group->addChild(
871             new CompositeTest(group->getTestContext(), de::toString(i), cases[i], computePipelineConstructionType));
872     }
873 }
874 
875 enum Dim
876 {
877     DimX,
878     DimY,
879     DimZ,
880 };
881 
882 class MaxWorkgroupsInstance : public vkt::TestInstance
883 {
884 public:
MaxWorkgroupsInstance(Context & context,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)885     MaxWorkgroupsInstance(Context &context, Dim dim,
886                           const vk::ComputePipelineConstructionType computePipelineConstructionType)
887         : TestInstance(context)
888         , m_dim(dim)
889         , m_computePipelineConstructionType(computePipelineConstructionType)
890     {
891     }
892     tcu::TestStatus iterate(void);
893 
894 private:
895     Dim m_dim;
896     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
897 };
898 
899 class MaxWorkgroupsTest : public vkt::TestCase
900 {
901 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)902     MaxWorkgroupsTest(tcu::TestContext &testCtx, const std::string &name, Dim dim,
903                       const vk::ComputePipelineConstructionType computePipelineConstructionType)
904         : TestCase(testCtx, name)
905         , m_dim(dim)
906         , m_computePipelineConstructionType(computePipelineConstructionType)
907     {
908     }
909 
910     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const911     TestInstance *createInstance(Context &context) const
912     {
913         return new MaxWorkgroupsInstance(context, m_dim, m_computePipelineConstructionType);
914     }
915     virtual void checkSupport(Context &context) const;
916 
917 private:
918     Dim m_dim;
919     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
920 };
921 
checkSupport(Context & context) const922 void MaxWorkgroupsTest::checkSupport(Context &context) const
923 {
924     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
925     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
926                                   m_computePipelineConstructionType);
927 }
928 
initPrograms(SourceCollections & sourceCollections) const929 void MaxWorkgroupsTest::initPrograms(SourceCollections &sourceCollections) const
930 {
931     std::ostringstream src;
932     src << "#version 450\n";
933     src << "#extension GL_EXT_null_initializer : enable\n";
934     src << "\n";
935     src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
936     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
937     src << "shared uint wg_mem[2] = {};\n";
938     std::string dim;
939     switch (m_dim)
940     {
941     case DimX:
942         dim = "x";
943         break;
944     case DimY:
945         dim = "y";
946         break;
947     case DimZ:
948         dim = "z";
949         break;
950     }
951     src << "\n";
952     src << "void main() {\n";
953     src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
954     src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
955     src << "  uint idx_x = gl_LocalInvocationID.x;\n";
956     src << "  uint idx = idx_x + idx_y + idx_z;\n";
957     src << "  if (gl_LocalInvocationID.x == 0) {\n";
958     src << "    wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
959     src << "  }\n";
960     src << "  barrier();\n";
961     src << "  atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
962     src << "}\n";
963 
964     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
965 }
966 
iterate(void)967 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
968 {
969     VkPhysicalDeviceProperties properties;
970     deMemset(&properties, 0, sizeof(properties));
971     m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
972 
973     const uint32_t maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
974     uint32_t wgx         = properties.limits.maxComputeWorkGroupSize[0];
975     uint32_t wgy         = 1;
976     uint32_t wgz         = 1;
977     if (wgx < maxWG)
978     {
979         wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
980     }
981     if ((wgx * wgy) < maxWG)
982     {
983         wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
984     }
985     uint32_t size = (uint32_t)sizeof(uint32_t) * wgx * wgy * wgz;
986 
987     uint32_t num_wgx = m_dim == DimX ? 65535 : 1;
988     uint32_t num_wgy = m_dim == DimY ? 65535 : 1;
989     uint32_t num_wgz = m_dim == DimZ ? 65535 : 1;
990 
991     return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, m_computePipelineConstructionType, {wgx, wgy, wgz},
992                       /*increment*/ 1);
993 }
994 
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)995 void AddMaxWorkgroupsTests(tcu::TestCaseGroup *group,
996                            vk::ComputePipelineConstructionType computePipelineConstructionType)
997 {
998     group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", DimX, computePipelineConstructionType));
999     group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", DimY, computePipelineConstructionType));
1000     group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", DimZ, computePipelineConstructionType));
1001 }
1002 
1003 class SpecializeWorkgroupInstance : public vkt::TestInstance
1004 {
1005 public:
SpecializeWorkgroupInstance(Context & context,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1006     SpecializeWorkgroupInstance(Context &context, uint32_t x, uint32_t y, uint32_t z,
1007                                 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1008         : TestInstance(context)
1009         , m_x(x)
1010         , m_y(y)
1011         , m_z(z)
1012         , m_computePipelineConstructionType(computePipelineConstructionType)
1013     {
1014     }
1015     tcu::TestStatus iterate(void);
1016 
1017 private:
1018     uint32_t m_x;
1019     uint32_t m_y;
1020     uint32_t m_z;
1021     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1022 };
1023 
1024 class SpecializeWorkgroupTest : public vkt::TestCase
1025 {
1026 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1027     SpecializeWorkgroupTest(tcu::TestContext &testCtx, const std::string &name, uint32_t x, uint32_t y, uint32_t z,
1028                             const vk::ComputePipelineConstructionType computePipelineConstructionType)
1029         : TestCase(testCtx, name)
1030         , m_x(x)
1031         , m_y(y)
1032         , m_z(z)
1033         , m_computePipelineConstructionType(computePipelineConstructionType)
1034     {
1035     }
1036 
1037     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1038     TestInstance *createInstance(Context &context) const
1039     {
1040         return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z, m_computePipelineConstructionType);
1041     }
1042     virtual void checkSupport(Context &context) const;
1043 
1044 private:
1045     uint32_t m_x;
1046     uint32_t m_y;
1047     uint32_t m_z;
1048     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1049 };
1050 
checkSupport(Context & context) const1051 void SpecializeWorkgroupTest::checkSupport(Context &context) const
1052 {
1053     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1054     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1055                                   m_computePipelineConstructionType);
1056 
1057     VkPhysicalDeviceProperties properties;
1058     deMemset(&properties, 0, sizeof(properties));
1059     context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1060     if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1061         TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1062 }
1063 
initPrograms(SourceCollections & sourceCollections) const1064 void SpecializeWorkgroupTest::initPrograms(SourceCollections &sourceCollections) const
1065 {
1066     std::ostringstream src;
1067     src << "#version 450\n";
1068     src << "#extension GL_EXT_null_initializer : enable\n";
1069     src << "\n";
1070     src << "layout(constant_id = 0) const uint WGX = 1;\n";
1071     src << "layout(constant_id = 1) const uint WGY = 1;\n";
1072     src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1073     src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1074     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1075     src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1076     src << "\n";
1077     src << "void main() {\n";
1078     src << "  a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * "
1079            "gl_WorkGroupSize.x + gl_LocalInvocationID.x] = "
1080            "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1081     src << "}\n";
1082 
1083     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1084 }
1085 
iterate(void)1086 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1087 {
1088     const uint32_t size = (uint32_t)sizeof(uint32_t) * m_x * m_y * m_z;
1089     return runCompute(m_context, size, 1, 1, 1, m_computePipelineConstructionType, {m_x, m_y, m_z});
1090 }
1091 
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1092 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup *group,
1093                                  vk::ComputePipelineConstructionType computePipelineConstructionType)
1094 {
1095     for (uint32_t z = 1; z <= 8; ++z)
1096     {
1097         for (uint32_t y = 1; y <= 8; ++y)
1098         {
1099             for (uint32_t x = 1; x <= 8; ++x)
1100             {
1101                 group->addChild(new SpecializeWorkgroupTest(
1102                     group->getTestContext(), de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z), x, y, z,
1103                     computePipelineConstructionType));
1104             }
1105         }
1106     }
1107 }
1108 
1109 class RepeatedPipelineInstance : public vkt::TestInstance
1110 {
1111 public:
RepeatedPipelineInstance(Context & context,uint32_t xSize,uint32_t repeat,uint32_t odd)1112     RepeatedPipelineInstance(Context &context, uint32_t xSize, uint32_t repeat, uint32_t odd)
1113         : TestInstance(context)
1114         , m_xSize(xSize)
1115         , m_repeat(repeat)
1116         , m_odd(odd)
1117     {
1118     }
1119     tcu::TestStatus iterate(void);
1120 
1121 private:
1122     uint32_t m_xSize;
1123     uint32_t m_repeat;
1124     uint32_t m_odd;
1125 };
1126 
1127 class RepeatedPipelineTest : public vkt::TestCase
1128 {
1129 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,uint32_t xSize,uint32_t repeat,uint32_t odd,const vk::ComputePipelineConstructionType computePipelineConstructionType)1130     RepeatedPipelineTest(tcu::TestContext &testCtx, const std::string &name, uint32_t xSize, uint32_t repeat,
1131                          uint32_t odd, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1132         : TestCase(testCtx, name)
1133         , m_xSize(xSize)
1134         , m_repeat(repeat)
1135         , m_odd(odd)
1136         , m_computePipelineConstructionType(computePipelineConstructionType)
1137     {
1138     }
1139 
1140     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1141     TestInstance *createInstance(Context &context) const
1142     {
1143         return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1144     }
1145     virtual void checkSupport(Context &context) const;
1146 
1147 private:
1148     uint32_t m_xSize;
1149     uint32_t m_repeat;
1150     uint32_t m_odd;
1151     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1152 };
1153 
checkSupport(Context & context) const1154 void RepeatedPipelineTest::checkSupport(Context &context) const
1155 {
1156     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1157     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1158                                   m_computePipelineConstructionType);
1159 }
1160 
initPrograms(SourceCollections & sourceCollections) const1161 void RepeatedPipelineTest::initPrograms(SourceCollections &sourceCollections) const
1162 {
1163     std::ostringstream src;
1164     src << "#version 450\n";
1165     src << "#extension GL_EXT_null_initializer : enable\n";
1166     src << "\n";
1167     src << "layout(constant_id = 0) const uint WGX = 1;\n";
1168     src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1169     src << "\n";
1170     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1171     src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1172     src << "\n";
1173     src << "shared uint wg_mem[WGX][2] = {};\n";
1174     src << "void main() {\n";
1175     src << "  if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1176     src << "    wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + "
1177            "gl_LocalInvocationID.x];\n";
1178     src << "  }\n";
1179     src << "  barrier();\n";
1180     src << "  a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = "
1181            "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1182     src << "}\n";
1183 
1184     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1185 }
1186 
iterate(void)1187 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1188 {
1189     Context &context          = m_context;
1190     const uint32_t bufferSize = m_xSize * 2 * (uint32_t)sizeof(uint32_t);
1191     const uint32_t numBuffers = 2;
1192 
1193     const DeviceInterface &vk = context.getDeviceInterface();
1194     const VkDevice device     = context.getDevice();
1195     Allocator &allocator      = context.getDefaultAllocator();
1196     tcu::TestLog &log         = context.getTestContext().getLog();
1197 
1198     de::MovePtr<BufferWithMemory> buffers[numBuffers];
1199     VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1200 
1201     VkDeviceSize size = bufferSize;
1202     for (uint32_t i = 0; i < numBuffers; ++i)
1203     {
1204         buffers[i]           = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1205             vk, device, allocator,
1206             makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1207                                                      VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1208             MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1209         bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1210     }
1211 
1212     uint32_t *ptrs[numBuffers];
1213     for (uint32_t i = 0; i < numBuffers; ++i)
1214     {
1215         ptrs[i] = (uint32_t *)buffers[i]->getAllocation().getHostPtr();
1216     }
1217     for (uint32_t i = 0; i < bufferSize / sizeof(uint32_t); ++i)
1218     {
1219         ptrs[1][i] = i;
1220     }
1221     deMemset(ptrs[0], 0xff, (size_t)size);
1222 
1223     DescriptorSetLayoutBuilder layoutBuilder;
1224     for (uint32_t i = 0; i < numBuffers; ++i)
1225     {
1226         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1227     }
1228 
1229     Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1230     Unique<VkDescriptorPool> descriptorPool(
1231         DescriptorPoolBuilder()
1232             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1233             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1234     Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1235 
1236     const uint32_t specData[1] = {
1237         m_xSize,
1238     };
1239     const vk::VkSpecializationMapEntry entries[1] = {
1240         {0, (uint32_t)(sizeof(uint32_t) * 0), sizeof(uint32_t)},
1241     };
1242     const vk::VkSpecializationInfo specInfo = {1, entries, sizeof(specData), specData};
1243 
1244     const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
1245         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1246         DE_NULL,
1247         (VkPipelineLayoutCreateFlags)0,
1248         1,
1249         &descriptorSetLayout.get(),
1250         0u,
1251         DE_NULL,
1252     };
1253     Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1254     VkPipelineBindPoint bindPoint         = VK_PIPELINE_BIND_POINT_COMPUTE;
1255 
1256     for (uint32_t i = 0; i < numBuffers; ++i)
1257     {
1258         flushAlloc(vk, device, buffers[i]->getAllocation());
1259     }
1260 
1261     const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1262     const VkPipelineShaderStageCreateInfo shaderInfo = {
1263         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1264         DE_NULL,
1265         0,
1266         VK_SHADER_STAGE_COMPUTE_BIT,
1267         *shader,
1268         "main",
1269         &specInfo,
1270     };
1271 
1272     const VkComputePipelineCreateInfo pipelineInfo = {
1273         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, DE_NULL, 0u, shaderInfo, *pipelineLayout, (VkPipeline)0, 0u,
1274     };
1275     Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1276 
1277     const VkQueue queue             = context.getUniversalQueue();
1278     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1279                                                         context.getUniversalQueueFamilyIndex());
1280     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1281 
1282     DescriptorSetUpdateBuilder setUpdateBuilder;
1283     for (uint32_t i = 0; i < numBuffers; ++i)
1284     {
1285         setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1286                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1287     }
1288     setUpdateBuilder.update(vk, device);
1289 
1290     beginCommandBuffer(vk, *cmdBuffer, 0);
1291 
1292     vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1293     vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1294 
1295     vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1296 
1297     const VkMemoryBarrier barrier = {
1298         VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1299         nullptr,                          // pNext
1300         VK_ACCESS_SHADER_WRITE_BIT,       // srcAccessMask
1301         VK_ACCESS_HOST_READ_BIT,          // dstAccessMask
1302     };
1303     vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1304                           (VkDependencyFlags)0, 1, &barrier, 0, nullptr, 0, nullptr);
1305 
1306     endCommandBuffer(vk, *cmdBuffer);
1307 
1308     for (uint32_t r = 0; r < m_repeat; ++r)
1309     {
1310         submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1311 
1312         invalidateAlloc(vk, device, buffers[0]->getAllocation());
1313 
1314         for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
1315         {
1316             uint32_t expected = (m_odd == (i / m_xSize)) ? i : 0u;
1317             if (ptrs[0][i] != expected)
1318             {
1319                 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected
1320                     << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1321                 return tcu::TestStatus::fail("compute failed");
1322             }
1323         }
1324 
1325         deMemset(ptrs[0], 0xff, (size_t)size);
1326         flushAlloc(vk, device, buffers[0]->getAllocation());
1327         setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1328                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1329         setUpdateBuilder.update(vk, device);
1330     }
1331 
1332     return tcu::TestStatus::pass("compute succeeded");
1333 }
1334 
AddRepeatedPipelineTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1335 void AddRepeatedPipelineTests(tcu::TestCaseGroup *group,
1336                               vk::ComputePipelineConstructionType computePipelineConstructionType)
1337 {
1338     std::vector<uint32_t> xSizes  = {4, 16, 32, 64};
1339     std::vector<uint32_t> odds    = {0, 1};
1340     std::vector<uint32_t> repeats = {2, 4, 8, 16};
1341     for (uint32_t i = 0; i < xSizes.size(); ++i)
1342     {
1343         uint32_t x = xSizes[i];
1344         for (uint32_t j = 0; j < odds.size(); ++j)
1345         {
1346             uint32_t odd = odds[j];
1347             for (uint32_t k = 0; k < repeats.size(); ++k)
1348             {
1349                 uint32_t repeat = repeats[k];
1350                 group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1351                                                          std::string("x_") + de::toString(x) +
1352                                                              (odd == 1 ? "_odd" : "_even") + "_repeat_" +
1353                                                              de::toString(repeat),
1354                                                          x, odd, repeat, computePipelineConstructionType));
1355             }
1356         }
1357     }
1358 }
1359 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group)1360 void AddSharedMemoryTests(tcu::TestCaseGroup *group)
1361 {
1362     tcu::TestContext &testCtx = group->getTestContext();
1363     std::string filePath      = "compute/zero_initialize_workgroup_memory";
1364     std::vector<std::string> requirements;
1365 
1366     std::string testNames[] = {"workgroup_size_128",   "workgroup_size_8x8x2", "workgroup_size_8x2x8",
1367                                "workgroup_size_2x8x8", "workgroup_size_8x4x4", "workgroup_size_4x8x4",
1368                                "workgroup_size_4x4x8"};
1369 
1370     requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1371 
1372     for (const auto &testName : testNames)
1373     {
1374         group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(),
1375                                                        testName + ".amber", requirements));
1376     }
1377 }
1378 #endif // CTS_USES_VULKANSC
1379 
1380 } // namespace
1381 
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)1382 tcu::TestCaseGroup *createZeroInitializeWorkgroupMemoryTests(
1383     tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1384 {
1385     de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory"));
1386 
1387     tcu::TestCaseGroup *maxWorkgroupMemoryGroup =
1388         // Read initialization of max workgroup memory
1389         new tcu::TestCaseGroup(testCtx, "max_workgroup_memory");
1390     AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup, computePipelineConstructionType);
1391     tests->addChild(maxWorkgroupMemoryGroup);
1392 
1393     tcu::TestCaseGroup *typeGroup = new tcu::TestCaseGroup(testCtx, "types");
1394     AddTypeTests(typeGroup, computePipelineConstructionType);
1395     tests->addChild(typeGroup);
1396 
1397     tcu::TestCaseGroup *compositeGroup = new tcu::TestCaseGroup(testCtx, "composites");
1398     AddCompositeTests(compositeGroup, computePipelineConstructionType);
1399     tests->addChild(compositeGroup);
1400 
1401     tcu::TestCaseGroup *maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups");
1402     AddMaxWorkgroupsTests(maxWorkgroupsGroup, computePipelineConstructionType);
1403     tests->addChild(maxWorkgroupsGroup);
1404 
1405     tcu::TestCaseGroup *specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup");
1406     AddSpecializeWorkgroupTests(specializeWorkgroupGroup, computePipelineConstructionType);
1407     tests->addChild(specializeWorkgroupGroup);
1408 
1409     tcu::TestCaseGroup *repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline");
1410     AddRepeatedPipelineTests(repeatPipelineGroup, computePipelineConstructionType);
1411     tests->addChild(repeatPipelineGroup);
1412 
1413 #ifndef CTS_USES_VULKANSC
1414     // These are Amber tests and Amber cannot use shader objects.
1415     if (!isComputePipelineConstructionTypeShaderObject(computePipelineConstructionType))
1416     {
1417         tcu::TestCaseGroup *subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks");
1418         AddSharedMemoryTests(subgroupInvocationGroup);
1419         tests->addChild(subgroupInvocationGroup);
1420     }
1421 #endif // CTS_USES_VULKANSC
1422 
1423     return tests.release();
1424 }
1425 
1426 } // namespace compute
1427 } // namespace vkt
1428