1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Google LLC.
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief VK_KHR_zero_initialize_workgroup_memory tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktTestGroupUtil.hpp"
31 #include "vktAmberTestCase.hpp"
32
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49
50 #include <algorithm>
51 #include <vector>
52
53 using namespace vk;
54
55 namespace vkt
56 {
57 namespace compute
58 {
59 namespace
60 {
61
runCompute(Context & context,uint32_t bufferSize,uint32_t numWGX,uint32_t numWGY,uint32_t numWGZ,vk::ComputePipelineConstructionType m_computePipelineConstructionType,const std::vector<uint32_t> specValues={},uint32_t increment=0)62 tcu::TestStatus runCompute(Context &context, uint32_t bufferSize, uint32_t numWGX, uint32_t numWGY, uint32_t numWGZ,
63 vk::ComputePipelineConstructionType m_computePipelineConstructionType,
64 const std::vector<uint32_t> specValues = {}, uint32_t increment = 0)
65 {
66 const DeviceInterface &vk = context.getDeviceInterface();
67 const VkDevice device = context.getDevice();
68 Allocator &allocator = context.getDefaultAllocator();
69 tcu::TestLog &log = context.getTestContext().getLog();
70
71 de::MovePtr<BufferWithMemory> buffer;
72 VkDescriptorBufferInfo bufferDescriptor;
73
74 VkDeviceSize size = bufferSize;
75 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
76 vk, device, allocator,
77 makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
78 VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
79 MemoryRequirement::HostVisible));
80 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
81
82 uint32_t *ptr = (uint32_t *)buffer->getAllocation().getHostPtr();
83 deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
84
85 DescriptorSetLayoutBuilder layoutBuilder;
86 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
87
88 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
89 Unique<VkDescriptorPool> descriptorPool(
90 DescriptorPoolBuilder()
91 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
92 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
93 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
94
95 std::vector<VkSpecializationMapEntry> entries(specValues.size());
96 if (!specValues.empty())
97 {
98 for (uint32_t i = 0; i < specValues.size(); ++i)
99 {
100 entries[i] = {i, (uint32_t)(sizeof(uint32_t) * i), sizeof(uint32_t)};
101 }
102 }
103 const VkSpecializationInfo specInfo = {
104 (uint32_t)specValues.size(),
105 entries.data(),
106 specValues.size() * sizeof(uint32_t),
107 specValues.data(),
108 };
109 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
110 flushAlloc(vk, device, buffer->getAllocation());
111
112 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
113 context.getBinaryCollection().get("comp"));
114 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
115 pipeline.setSpecializationInfo(specInfo);
116 pipeline.buildPipeline();
117
118 const VkQueue queue = context.getUniversalQueue();
119 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
120 context.getUniversalQueueFamilyIndex());
121 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
122
123 DescriptorSetUpdateBuilder setUpdateBuilder;
124 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
125 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
126 setUpdateBuilder.update(vk, device);
127
128 beginCommandBuffer(vk, *cmdBuffer, 0);
129
130 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
131 pipeline.bind(*cmdBuffer);
132
133 vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
134
135 const VkMemoryBarrier barrier = {
136 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
137 nullptr, // pNext
138 VK_ACCESS_SHADER_WRITE_BIT, // srcAccessMask
139 VK_ACCESS_HOST_READ_BIT, // dstAccessMask
140 };
141 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
142 (VkDependencyFlags)0, 1, &barrier, 0, nullptr, 0, nullptr);
143
144 endCommandBuffer(vk, *cmdBuffer);
145
146 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
147
148 invalidateAlloc(vk, device, buffer->getAllocation());
149
150 for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
151 {
152 uint32_t expected = increment ? numWGX * numWGY * numWGZ : 0u;
153 if (ptr[i] != expected)
154 {
155 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i]
156 << tcu::TestLog::EndMessage;
157 return tcu::TestStatus::fail("compute failed");
158 }
159 }
160
161 return tcu::TestStatus::pass("compute succeeded");
162 }
163
164 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
165 {
166 public:
MaxWorkgroupMemoryInstance(Context & context,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)167 MaxWorkgroupMemoryInstance(Context &context, uint32_t numWorkgroups,
168 const vk::ComputePipelineConstructionType computePipelineConstructionType)
169 : TestInstance(context)
170 , m_numWorkgroups(numWorkgroups)
171 , m_computePipelineConstructionType(computePipelineConstructionType)
172 {
173 }
174 tcu::TestStatus iterate(void);
175
176 private:
177 uint32_t m_numWorkgroups;
178 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
179 };
180
181 class MaxWorkgroupMemoryTest : public vkt::TestCase
182 {
183 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)184 MaxWorkgroupMemoryTest(tcu::TestContext &testCtx, const std::string &name, uint32_t numWorkgroups,
185 const vk::ComputePipelineConstructionType computePipelineConstructionType)
186 : TestCase(testCtx, name)
187 , m_numWorkgroups(numWorkgroups)
188 , m_computePipelineConstructionType(computePipelineConstructionType)
189 {
190 }
191
192 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const193 TestInstance *createInstance(Context &context) const
194 {
195 return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups, m_computePipelineConstructionType);
196 }
197 virtual void checkSupport(Context &context) const;
198
199 private:
200 uint32_t m_numWorkgroups;
201 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
202 };
203
checkSupport(Context & context) const204 void MaxWorkgroupMemoryTest::checkSupport(Context &context) const
205 {
206 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
207 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
208 m_computePipelineConstructionType);
209 }
210
initPrograms(SourceCollections & sourceCollections) const211 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections &sourceCollections) const
212 {
213 std::ostringstream src;
214 src << "#version 450\n";
215 src << "#extension GL_EXT_null_initializer : enable\n";
216 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
217 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
218 src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
219 src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
220 src << "shared uvec4 wg_mem[num_elems] = {};\n";
221 src << "void main() {\n";
222 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
223 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
224 src << " uint idx_x = gl_LocalInvocationID.x;\n";
225 src << " uint idx = idx_x + idx_y + idx_z;\n";
226 src << " uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
227 src << " for (uint i = 0; i < num_elems; ++i) {\n";
228 src << " for (uint j = 0; j < 4; ++j) {\n";
229 src << " uint shared_idx = 4*i + j;\n";
230 src << " uint wg_val = wg_mem[i][j];\n";
231 src << " if (idx == shared_idx) {\n";
232 src << " atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
233 src << " } else if (idx == 0 && shared_idx >= wg_size) {\n";
234 src << " atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
235 src << " }\n";
236 src << " }\n";
237 src << " }\n";
238 src << "}\n";
239
240 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
241 }
242
iterate(void)243 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
244 {
245 VkPhysicalDeviceProperties properties;
246 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
247 const uint32_t maxMemSize = properties.limits.maxComputeSharedMemorySize;
248
249 const uint32_t maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
250 uint32_t wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
251 uint32_t wgy = 1;
252 uint32_t wgz = 1;
253 if (wgx < maxWG)
254 {
255 wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
256 }
257 if ((wgx * wgy) < maxWG)
258 {
259 wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
260 }
261 const uint32_t size = maxMemSize;
262 const uint32_t numElems = maxMemSize / 16;
263
264 return runCompute(m_context, size, m_numWorkgroups, 1, 1, m_computePipelineConstructionType,
265 {wgx, wgy, wgz, numElems}, /*increment*/ 1);
266 }
267
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)268 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup *group,
269 vk::ComputePipelineConstructionType computePipelineConstructionType)
270 {
271 std::vector<uint32_t> workgroups = {1, 2, 4, 16, 64, 128};
272 for (uint32_t i = 0; i < workgroups.size(); ++i)
273 {
274 uint32_t numWG = workgroups[i];
275 group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(), de::toString(numWG), numWG,
276 computePipelineConstructionType));
277 }
278 }
279
280 struct TypeCaseDef
281 {
282 std::string typeName;
283 uint32_t typeSize;
284 uint32_t numElements;
285 uint32_t numRows;
286 uint32_t numVariables;
287 };
288
289 class TypeTestInstance : public vkt::TestInstance
290 {
291 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)292 TypeTestInstance(Context &context, const TypeCaseDef &caseDef,
293 const vk::ComputePipelineConstructionType computePipelineConstructionType)
294 : TestInstance(context)
295 , m_caseDef(caseDef)
296 , m_computePipelineConstructionType(computePipelineConstructionType)
297 {
298 }
299 tcu::TestStatus iterate(void);
300
301 private:
302 TypeCaseDef m_caseDef;
303 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
304 };
305
306 class TypeTest : public vkt::TestCase
307 {
308 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)309 TypeTest(tcu::TestContext &testCtx, const std::string &name, const TypeCaseDef &caseDef,
310 const vk::ComputePipelineConstructionType computePipelineConstructionType)
311 : TestCase(testCtx, name)
312 , m_caseDef(caseDef)
313 , m_computePipelineConstructionType(computePipelineConstructionType)
314 {
315 }
316
317 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const318 TestInstance *createInstance(Context &context) const
319 {
320 return new TypeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
321 }
322 virtual void checkSupport(Context &context) const;
323
324 private:
325 TypeCaseDef m_caseDef;
326 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
327 };
328
checkSupport(Context & context) const329 void TypeTest::checkSupport(Context &context) const
330 {
331 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
332 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
333 m_computePipelineConstructionType);
334
335 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
336 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
337 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
338 f16_i8_features.pNext = DE_NULL;
339
340 VkPhysicalDeviceFeatures2 features2;
341 deMemset(&features2, 0, sizeof(features2));
342 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
343 features2.pNext = &f16_i8_features;
344 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
345
346 if (m_caseDef.typeName == "float16_t" || m_caseDef.typeName == "f16vec2" || m_caseDef.typeName == "f16vec3" ||
347 m_caseDef.typeName == "f16vec4" || m_caseDef.typeName == "f16mat2x2" || m_caseDef.typeName == "f16mat2x3" ||
348 m_caseDef.typeName == "f16mat2x4" || m_caseDef.typeName == "f16mat3x2" || m_caseDef.typeName == "f16mat3x3" ||
349 m_caseDef.typeName == "f16mat3x4" || m_caseDef.typeName == "f16mat4x2" || m_caseDef.typeName == "f16mat4x3" ||
350 m_caseDef.typeName == "f16mat4x4")
351 {
352 if (f16_i8_features.shaderFloat16 != VK_TRUE)
353 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
354 }
355
356 if (m_caseDef.typeName == "float64_t" || m_caseDef.typeName == "f64vec2" || m_caseDef.typeName == "f64vec3" ||
357 m_caseDef.typeName == "f64vec4" || m_caseDef.typeName == "f64mat2x2" || m_caseDef.typeName == "f64mat2x3" ||
358 m_caseDef.typeName == "f64mat2x4" || m_caseDef.typeName == "f64mat3x2" || m_caseDef.typeName == "f64mat3x3" ||
359 m_caseDef.typeName == "f64mat3x4" || m_caseDef.typeName == "f64mat4x2" || m_caseDef.typeName == "f64mat4x3" ||
360 m_caseDef.typeName == "f64mat4x4")
361 {
362 if (features2.features.shaderFloat64 != VK_TRUE)
363 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
364 }
365
366 if (m_caseDef.typeName == "int8_t" || m_caseDef.typeName == "i8vec2" || m_caseDef.typeName == "i8vec3" ||
367 m_caseDef.typeName == "i8vec4" || m_caseDef.typeName == "uint8_t" || m_caseDef.typeName == "u8vec2" ||
368 m_caseDef.typeName == "u8vec3" || m_caseDef.typeName == "u8vec4")
369 {
370 if (f16_i8_features.shaderInt8 != VK_TRUE)
371 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
372 }
373
374 if (m_caseDef.typeName == "int16_t" || m_caseDef.typeName == "i16vec2" || m_caseDef.typeName == "i16vec3" ||
375 m_caseDef.typeName == "i16vec4" || m_caseDef.typeName == "uint16_t" || m_caseDef.typeName == "u16vec2" ||
376 m_caseDef.typeName == "u16vec3" || m_caseDef.typeName == "u16vec4")
377 {
378 if (features2.features.shaderInt16 != VK_TRUE)
379 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
380 }
381
382 if (m_caseDef.typeName == "int64_t" || m_caseDef.typeName == "i64vec2" || m_caseDef.typeName == "i64vec3" ||
383 m_caseDef.typeName == "i64vec4" || m_caseDef.typeName == "uint64_t" || m_caseDef.typeName == "u64vec2" ||
384 m_caseDef.typeName == "u64vec3" || m_caseDef.typeName == "u64vec4")
385 {
386 if (features2.features.shaderInt64 != VK_TRUE)
387 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
388 }
389 }
390
initPrograms(SourceCollections & sourceCollections) const391 void TypeTest::initPrograms(SourceCollections &sourceCollections) const
392 {
393 std::ostringstream src;
394 src << "#version 450\n";
395 src << "#extension GL_EXT_null_initializer : enable\n";
396 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
397 src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows
398 << ", local_size_y = 1, local_size_z = 1) in;\n";
399 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
400 for (uint32_t i = 0; i < m_caseDef.numVariables; ++i)
401 {
402 src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
403 }
404 src << "void main() {\n";
405 if (m_caseDef.numRows > 1)
406 {
407 src << " uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
408 src << " uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
409 }
410 std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
411 for (uint32_t v = 0; v < m_caseDef.numVariables; ++v)
412 {
413 if (m_caseDef.numElements == 1)
414 {
415 // Scalars.
416 src << " a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") == 0) ? 0 : 1;\n";
417 }
418 else if (m_caseDef.numRows == 1)
419 {
420 // Vectors.
421 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
422 << "(wg_mem" << v << "[gl_LocalInvocationID.x]) == 0) ? 0 : 1;\n";
423 }
424 else
425 {
426 // Matrices.
427 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
428 << "(wg_mem" << v << "[row][col]) == 0) ? 0 : 1;\n";
429 }
430 }
431 src << "}\n";
432
433 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
434 }
435
iterate(void)436 tcu::TestStatus TypeTestInstance::iterate(void)
437 {
438 const uint32_t varBytes = m_caseDef.numElements * m_caseDef.numRows * (uint32_t)sizeof(uint32_t);
439 return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1, m_computePipelineConstructionType);
440 }
441
AddTypeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)442 void AddTypeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
443 {
444 deRandom rnd;
445 deRandom_init(&rnd, 0);
446 std::vector<TypeCaseDef> cases = {
447 {"bool", 1, 1, 1, 0}, {"bvec2", 1, 2, 1, 0}, {"bvec3", 1, 3, 1, 0}, {"bvec4", 1, 4, 1, 0},
448 {"uint32_t", 4, 1, 1, 0}, {"uvec2", 4, 2, 1, 0}, {"uvec3", 4, 3, 1, 0}, {"uvec4", 4, 4, 1, 0},
449 {"int32_t", 4, 1, 1, 0}, {"ivec2", 4, 2, 1, 0}, {"ivec3", 4, 3, 1, 0}, {"ivec4", 4, 4, 1, 0},
450 {"uint8_t", 1, 1, 1, 0}, {"u8vec2", 1, 2, 1, 0}, {"u8vec3", 1, 3, 1, 0}, {"u8vec4", 1, 4, 1, 0},
451 {"int8_t", 1, 1, 1, 0}, {"i8vec2", 1, 2, 1, 0}, {"i8vec3", 1, 3, 1, 0}, {"i8vec4", 1, 4, 1, 0},
452 {"uint16_t", 2, 1, 1, 0}, {"u16vec2", 2, 2, 1, 0}, {"u16vec3", 2, 3, 1, 0}, {"u16vec4", 2, 4, 1, 0},
453 {"int16_t", 2, 1, 1, 0}, {"i16vec2", 2, 2, 1, 0}, {"i16vec3", 2, 3, 1, 0}, {"i16vec4", 2, 4, 1, 0},
454 {"uint64_t", 8, 1, 1, 0}, {"u64vec2", 8, 2, 1, 0}, {"u64vec3", 8, 3, 1, 0}, {"u64vec4", 8, 4, 1, 0},
455 {"int64_t", 8, 1, 1, 0}, {"i64vec2", 8, 2, 1, 0}, {"i64vec3", 8, 3, 1, 0}, {"i64vec4", 8, 4, 1, 0},
456 {"float32_t", 4, 1, 1, 0}, {"f32vec2", 4, 2, 1, 0}, {"f32vec3", 4, 3, 1, 0}, {"f32vec4", 4, 4, 1, 0},
457 {"f32mat2x2", 4, 2, 2, 0}, {"f32mat2x3", 4, 3, 2, 0}, {"f32mat2x4", 4, 4, 2, 0}, {"f32mat3x2", 4, 2, 3, 0},
458 {"f32mat3x3", 4, 3, 3, 0}, {"f32mat3x4", 4, 4, 3, 0}, {"f32mat4x2", 4, 2, 4, 0}, {"f32mat4x3", 4, 3, 4, 0},
459 {"f32mat4x4", 4, 4, 4, 0}, {"float16_t", 2, 1, 1, 0}, {"f16vec2", 2, 2, 1, 0}, {"f16vec3", 2, 3, 1, 0},
460 {"f16vec4", 2, 4, 1, 0}, {"f16mat2x2", 2, 2, 2, 0}, {"f16mat2x3", 2, 3, 2, 0}, {"f16mat2x4", 2, 4, 2, 0},
461 {"f16mat3x2", 2, 2, 3, 0}, {"f16mat3x3", 2, 3, 3, 0}, {"f16mat3x4", 2, 4, 3, 0}, {"f16mat4x2", 2, 2, 4, 0},
462 {"f16mat4x3", 2, 3, 4, 0}, {"f16mat4x4", 2, 4, 4, 0}, {"float64_t", 8, 1, 1, 0}, {"f64vec2", 8, 2, 1, 0},
463 {"f64vec3", 8, 3, 1, 0}, {"f64vec4", 8, 4, 1, 0}, {"f64mat2x2", 8, 2, 2, 0}, {"f64mat2x3", 8, 3, 2, 0},
464 {"f64mat2x4", 8, 4, 2, 0}, {"f64mat3x2", 8, 2, 3, 0}, {"f64mat3x3", 8, 3, 3, 0}, {"f64mat3x4", 8, 4, 3, 0},
465 {"f64mat4x2", 8, 2, 4, 0}, {"f64mat4x3", 8, 3, 4, 0}, {"f64mat4x4", 8, 4, 4, 0},
466 };
467
468 for (uint32_t i = 0; i < cases.size(); ++i)
469 {
470 cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
471 group->addChild(new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i],
472 computePipelineConstructionType));
473 }
474 }
475
476 struct CompositeCaseDef
477 {
478 uint32_t index;
479 std::string typeDefinition;
480 std::string assignment;
481 uint32_t elements;
482 std::vector<uint32_t> specValues;
483
CompositeCaseDefvkt::compute::__anon39880ba10111::CompositeCaseDef484 CompositeCaseDef(uint32_t index_, const std::string &typeDefinition_, const std::string &assignment_,
485 uint32_t elements_, const std::vector<uint32_t> &specValues_)
486 : index(index_)
487 , typeDefinition(typeDefinition_)
488 , assignment(assignment_)
489 , elements(elements_)
490 , specValues(specValues_)
491 {
492 }
493 };
494
495 class CompositeTestInstance : public vkt::TestInstance
496 {
497 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)498 CompositeTestInstance(Context &context, const CompositeCaseDef &caseDef,
499 const vk::ComputePipelineConstructionType computePipelineConstructionType)
500 : TestInstance(context)
501 , m_caseDef(caseDef)
502 , m_computePipelineConstructionType(computePipelineConstructionType)
503 {
504 }
505 tcu::TestStatus iterate(void);
506
507 private:
508 CompositeCaseDef m_caseDef;
509 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
510 };
511
512 class CompositeTest : public vkt::TestCase
513 {
514 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)515 CompositeTest(tcu::TestContext &testCtx, const std::string &name, const CompositeCaseDef &caseDef,
516 const vk::ComputePipelineConstructionType computePipelineConstructionType)
517 : TestCase(testCtx, name)
518 , m_caseDef(caseDef)
519 , m_computePipelineConstructionType(computePipelineConstructionType)
520 {
521 }
522
523 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const524 TestInstance *createInstance(Context &context) const
525 {
526 return new CompositeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
527 }
528 virtual void checkSupport(Context &context) const;
529
530 private:
531 CompositeCaseDef m_caseDef;
532 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
533 };
534
checkSupport(Context & context) const535 void CompositeTest::checkSupport(Context &context) const
536 {
537 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
538 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
539 m_computePipelineConstructionType);
540
541 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
542 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
543 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
544 f16_i8_features.pNext = DE_NULL;
545
546 VkPhysicalDeviceFeatures2 features2;
547 deMemset(&features2, 0, sizeof(features2));
548 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
549 features2.pNext = &f16_i8_features;
550 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
551
552 bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
553 bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
554 bool needsInt8 = (m_caseDef.index & 0x4) != 0;
555 bool needsInt16 = (m_caseDef.index & 0x8) != 0;
556 bool needsInt64 = (m_caseDef.index & 0x10) != 0;
557
558 if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
559 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
560 if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
561 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
562 if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
563 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
564 if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
565 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
566 if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
567 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
568 }
569
initPrograms(SourceCollections & sourceCollections) const570 void CompositeTest::initPrograms(SourceCollections &sourceCollections) const
571 {
572 std::ostringstream src;
573 src << "#version 450\n";
574 src << "#extension GL_EXT_null_initializer : enable\n";
575 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
576 src << "\n";
577 for (uint32_t i = 0; i < m_caseDef.specValues.size(); ++i)
578 {
579 src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
580 }
581 src << "\n";
582 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
583 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
584 src << "\n";
585 src << m_caseDef.typeDefinition;
586 src << "\n";
587 src << "void main() {\n";
588 src << m_caseDef.assignment;
589 src << "}\n";
590
591 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
592 }
593
iterate(void)594 tcu::TestStatus CompositeTestInstance::iterate(void)
595 {
596 const uint32_t bufferSize = (uint32_t)sizeof(uint32_t) * m_caseDef.elements;
597 return runCompute(m_context, bufferSize, 1, 1, 1, m_computePipelineConstructionType, m_caseDef.specValues);
598 }
599
AddCompositeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)600 void AddCompositeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
601 {
602 const std::vector<CompositeCaseDef> cases{
603 {
604 0,
605 "shared uint wg_mem[specId0] = {};\n",
606
607 "for (uint i = 0; i < specId0; ++i) {\n"
608 " a.a[i] = wg_mem[i];\n"
609 "}\n",
610 16,
611 {16},
612 },
613
614 {
615 0,
616 "shared float wg_mem[specId0][specId1] = {};\n",
617
618 "for (uint i = 0; i < specId0; ++i) {\n"
619 " for (uint j = 0; j < specId1; ++j) {\n"
620 " uint idx = i * specId1 + j;\n"
621 " a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
622 " }\n"
623 "}\n",
624 32,
625 {4, 8},
626 },
627
628 {
629 0,
630 "struct Sa {\n"
631 " uint a;\n"
632 " uvec2 b;\n"
633 " uvec3 c;\n"
634 " uvec4 d;\n"
635 " float e;\n"
636 " vec2 f;\n"
637 " vec3 g;\n"
638 " vec4 h;\n"
639 " bool i;\n"
640 " bvec2 j;\n"
641 " bvec3 k;\n"
642 " bvec4 l;\n"
643 "};\n"
644 "shared Sa wg_mem = {};\n",
645
646 "uint i = 0;\n"
647 "a.a[i++] = wg_mem.a;\n"
648 "a.a[i++] = wg_mem.b.x;\n"
649 "a.a[i++] = wg_mem.b.y;\n"
650 "a.a[i++] = wg_mem.c.x;\n"
651 "a.a[i++] = wg_mem.c.y;\n"
652 "a.a[i++] = wg_mem.c.z;\n"
653 "a.a[i++] = wg_mem.d.x;\n"
654 "a.a[i++] = wg_mem.d.y;\n"
655 "a.a[i++] = wg_mem.d.z;\n"
656 "a.a[i++] = wg_mem.d.w;\n"
657 "a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
658 "a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
659 "a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
660 "a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
661 "a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
662 "a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
663 "a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
664 "a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
665 "a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
666 "a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
667 "a.a[i++] = wg_mem.i ? 1 : 0;\n"
668 "a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
669 "a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
670 "a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
671 "a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
672 "a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
673 "a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
674 "a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
675 "a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
676 "a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
677 30,
678 {},
679 },
680
681 {
682 0,
683 "struct Sa {\n"
684 " uint a;\n"
685 "};\n"
686 "struct Sb {\n"
687 " uvec2 a;\n"
688 "};\n"
689 "struct Sc {\n"
690 " Sa a[specId0];\n"
691 " Sb b[specId1];\n"
692 "};\n"
693 "shared Sc wg_mem[specId2] = {};\n",
694
695 "uint idx = 0;\n"
696 "for (uint i = 0; i < specId2; ++i) {\n"
697 " for (uint j = 0; j < specId0; ++j) {\n"
698 " a.a[idx++] = wg_mem[i].a[j].a;\n"
699 " }\n"
700 " for (uint j = 0; j < specId1; ++j) {\n"
701 " a.a[idx++] = wg_mem[i].b[j].a.x;\n"
702 " a.a[idx++] = wg_mem[i].b[j].a.y;\n"
703 " }\n"
704 "}\n",
705 32,
706 {2, 3, 4},
707 },
708
709 {
710 1,
711 "struct Sa {\n"
712 " f16vec2 a;\n"
713 " float16_t b[specId0];\n"
714 "};\n"
715 "shared Sa wg_mem = {};\n",
716
717 "uint idx = 0;\n"
718 "a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
719 "a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
720 "for (uint i = 0; i < specId0; ++i) {\n"
721 " a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
722 "}\n",
723 18,
724 {16},
725 },
726
727 {
728 2,
729 "struct Sa {\n"
730 " f64vec2 a;\n"
731 " float64_t b[specId0];\n"
732 "};\n"
733 "shared Sa wg_mem = {};\n",
734
735 "uint idx = 0;\n"
736 "a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
737 "a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
738 "for (uint i = 0; i < specId0; ++i) {\n"
739 " a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
740 "}\n",
741 7,
742 {5},
743 },
744
745 {
746 4,
747 "struct Sa {\n"
748 " i8vec2 a;\n"
749 " int8_t b[specId0];\n"
750 "};\n"
751 "shared Sa wg_mem = {};\n",
752
753 "uint idx = 0;\n"
754 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
755 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
756 "for (uint i = 0; i < specId0; ++i) {\n"
757 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
758 "}\n",
759 34,
760 {32},
761 },
762
763 {
764 8,
765 "struct Sa {\n"
766 " i16vec2 a;\n"
767 " int16_t b[specId0];\n"
768 "};\n"
769 "shared Sa wg_mem = {};\n",
770
771 "uint idx = 0;\n"
772 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
773 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
774 "for (uint i = 0; i < specId0; ++i) {\n"
775 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
776 "}\n",
777 122,
778 {120},
779 },
780
781 {
782 16,
783 "struct Sa {\n"
784 " i64vec2 a;\n"
785 " int64_t b[specId0];\n"
786 "};\n"
787 "shared Sa wg_mem = {};\n",
788
789 "uint idx = 0;\n"
790 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
791 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
792 "for (uint i = 0; i < specId0; ++i) {\n"
793 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
794 "}\n",
795 63,
796 {61},
797 },
798
799 {
800 0x1f,
801 "struct Sa {\n"
802 " float16_t a;\n"
803 " float b;\n"
804 " int8_t c;\n"
805 " int16_t d;\n"
806 " int e;\n"
807 " int64_t f;\n"
808 " float64_t g;\n"
809 "};\n"
810 "shared Sa wg_mem = {};\n",
811
812 "uint idx = 0;\n"
813 "a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
814 "a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
815 "a.a[idx++] = uint(wg_mem.c);\n"
816 "a.a[idx++] = uint(wg_mem.d);\n"
817 "a.a[idx++] = uint(wg_mem.e);\n"
818 "a.a[idx++] = uint(wg_mem.f);\n"
819 "a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
820 7,
821 {},
822 },
823
824 {
825 0,
826 "struct Sa {\n"
827 " uint a;\n"
828 "};\n"
829 "struct Sb {\n"
830 " Sa a[specId0];\n"
831 " uint b;\n"
832 "};\n"
833 "struct Sc {\n"
834 " Sb b[specId1];\n"
835 " uint c;\n"
836 "};\n"
837 "struct Sd {\n"
838 " Sc c[specId2];\n"
839 " uint d;\n"
840 "};\n"
841 "struct Se {\n"
842 " Sd d[specId3];\n"
843 " uint e;\n"
844 "};\n"
845 "shared Se wg_mem[specId4] = {};\n",
846
847 "uint idx = 0;\n"
848 "for (uint i1 = 0; i1 < specId4; ++i1) {\n"
849 " a.a[idx++] = wg_mem[i1].e;\n"
850 " for (uint i2 = 0; i2 < specId3; ++i2) {\n"
851 " a.a[idx++] = wg_mem[i1].d[i2].d;\n"
852 " for (uint i3 = 0; i3 < specId2; ++i3) {\n"
853 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
854 " for (uint i4 = 0; i4 < specId1; ++i4) {\n"
855 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
856 " for (uint i5 = 0; i5 < specId0; ++i5) {\n"
857 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
858 " }\n"
859 " }\n"
860 " }\n"
861 " }\n"
862 "}\n",
863 872,
864 {6, 5, 4, 3, 2},
865 },
866 };
867
868 for (uint32_t i = 0; i < cases.size(); ++i)
869 {
870 group->addChild(
871 new CompositeTest(group->getTestContext(), de::toString(i), cases[i], computePipelineConstructionType));
872 }
873 }
874
875 enum Dim
876 {
877 DimX,
878 DimY,
879 DimZ,
880 };
881
882 class MaxWorkgroupsInstance : public vkt::TestInstance
883 {
884 public:
MaxWorkgroupsInstance(Context & context,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)885 MaxWorkgroupsInstance(Context &context, Dim dim,
886 const vk::ComputePipelineConstructionType computePipelineConstructionType)
887 : TestInstance(context)
888 , m_dim(dim)
889 , m_computePipelineConstructionType(computePipelineConstructionType)
890 {
891 }
892 tcu::TestStatus iterate(void);
893
894 private:
895 Dim m_dim;
896 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
897 };
898
899 class MaxWorkgroupsTest : public vkt::TestCase
900 {
901 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)902 MaxWorkgroupsTest(tcu::TestContext &testCtx, const std::string &name, Dim dim,
903 const vk::ComputePipelineConstructionType computePipelineConstructionType)
904 : TestCase(testCtx, name)
905 , m_dim(dim)
906 , m_computePipelineConstructionType(computePipelineConstructionType)
907 {
908 }
909
910 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const911 TestInstance *createInstance(Context &context) const
912 {
913 return new MaxWorkgroupsInstance(context, m_dim, m_computePipelineConstructionType);
914 }
915 virtual void checkSupport(Context &context) const;
916
917 private:
918 Dim m_dim;
919 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
920 };
921
checkSupport(Context & context) const922 void MaxWorkgroupsTest::checkSupport(Context &context) const
923 {
924 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
925 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
926 m_computePipelineConstructionType);
927 }
928
initPrograms(SourceCollections & sourceCollections) const929 void MaxWorkgroupsTest::initPrograms(SourceCollections &sourceCollections) const
930 {
931 std::ostringstream src;
932 src << "#version 450\n";
933 src << "#extension GL_EXT_null_initializer : enable\n";
934 src << "\n";
935 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
936 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
937 src << "shared uint wg_mem[2] = {};\n";
938 std::string dim;
939 switch (m_dim)
940 {
941 case DimX:
942 dim = "x";
943 break;
944 case DimY:
945 dim = "y";
946 break;
947 case DimZ:
948 dim = "z";
949 break;
950 }
951 src << "\n";
952 src << "void main() {\n";
953 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
954 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
955 src << " uint idx_x = gl_LocalInvocationID.x;\n";
956 src << " uint idx = idx_x + idx_y + idx_z;\n";
957 src << " if (gl_LocalInvocationID.x == 0) {\n";
958 src << " wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
959 src << " }\n";
960 src << " barrier();\n";
961 src << " atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
962 src << "}\n";
963
964 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
965 }
966
iterate(void)967 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
968 {
969 VkPhysicalDeviceProperties properties;
970 deMemset(&properties, 0, sizeof(properties));
971 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
972
973 const uint32_t maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
974 uint32_t wgx = properties.limits.maxComputeWorkGroupSize[0];
975 uint32_t wgy = 1;
976 uint32_t wgz = 1;
977 if (wgx < maxWG)
978 {
979 wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
980 }
981 if ((wgx * wgy) < maxWG)
982 {
983 wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
984 }
985 uint32_t size = (uint32_t)sizeof(uint32_t) * wgx * wgy * wgz;
986
987 uint32_t num_wgx = m_dim == DimX ? 65535 : 1;
988 uint32_t num_wgy = m_dim == DimY ? 65535 : 1;
989 uint32_t num_wgz = m_dim == DimZ ? 65535 : 1;
990
991 return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, m_computePipelineConstructionType, {wgx, wgy, wgz},
992 /*increment*/ 1);
993 }
994
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)995 void AddMaxWorkgroupsTests(tcu::TestCaseGroup *group,
996 vk::ComputePipelineConstructionType computePipelineConstructionType)
997 {
998 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", DimX, computePipelineConstructionType));
999 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", DimY, computePipelineConstructionType));
1000 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", DimZ, computePipelineConstructionType));
1001 }
1002
1003 class SpecializeWorkgroupInstance : public vkt::TestInstance
1004 {
1005 public:
SpecializeWorkgroupInstance(Context & context,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1006 SpecializeWorkgroupInstance(Context &context, uint32_t x, uint32_t y, uint32_t z,
1007 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1008 : TestInstance(context)
1009 , m_x(x)
1010 , m_y(y)
1011 , m_z(z)
1012 , m_computePipelineConstructionType(computePipelineConstructionType)
1013 {
1014 }
1015 tcu::TestStatus iterate(void);
1016
1017 private:
1018 uint32_t m_x;
1019 uint32_t m_y;
1020 uint32_t m_z;
1021 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1022 };
1023
1024 class SpecializeWorkgroupTest : public vkt::TestCase
1025 {
1026 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1027 SpecializeWorkgroupTest(tcu::TestContext &testCtx, const std::string &name, uint32_t x, uint32_t y, uint32_t z,
1028 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1029 : TestCase(testCtx, name)
1030 , m_x(x)
1031 , m_y(y)
1032 , m_z(z)
1033 , m_computePipelineConstructionType(computePipelineConstructionType)
1034 {
1035 }
1036
1037 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1038 TestInstance *createInstance(Context &context) const
1039 {
1040 return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z, m_computePipelineConstructionType);
1041 }
1042 virtual void checkSupport(Context &context) const;
1043
1044 private:
1045 uint32_t m_x;
1046 uint32_t m_y;
1047 uint32_t m_z;
1048 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1049 };
1050
checkSupport(Context & context) const1051 void SpecializeWorkgroupTest::checkSupport(Context &context) const
1052 {
1053 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1054 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1055 m_computePipelineConstructionType);
1056
1057 VkPhysicalDeviceProperties properties;
1058 deMemset(&properties, 0, sizeof(properties));
1059 context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1060 if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1061 TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1062 }
1063
initPrograms(SourceCollections & sourceCollections) const1064 void SpecializeWorkgroupTest::initPrograms(SourceCollections &sourceCollections) const
1065 {
1066 std::ostringstream src;
1067 src << "#version 450\n";
1068 src << "#extension GL_EXT_null_initializer : enable\n";
1069 src << "\n";
1070 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1071 src << "layout(constant_id = 1) const uint WGY = 1;\n";
1072 src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1073 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1074 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1075 src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1076 src << "\n";
1077 src << "void main() {\n";
1078 src << " a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * "
1079 "gl_WorkGroupSize.x + gl_LocalInvocationID.x] = "
1080 "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1081 src << "}\n";
1082
1083 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1084 }
1085
iterate(void)1086 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1087 {
1088 const uint32_t size = (uint32_t)sizeof(uint32_t) * m_x * m_y * m_z;
1089 return runCompute(m_context, size, 1, 1, 1, m_computePipelineConstructionType, {m_x, m_y, m_z});
1090 }
1091
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1092 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup *group,
1093 vk::ComputePipelineConstructionType computePipelineConstructionType)
1094 {
1095 for (uint32_t z = 1; z <= 8; ++z)
1096 {
1097 for (uint32_t y = 1; y <= 8; ++y)
1098 {
1099 for (uint32_t x = 1; x <= 8; ++x)
1100 {
1101 group->addChild(new SpecializeWorkgroupTest(
1102 group->getTestContext(), de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z), x, y, z,
1103 computePipelineConstructionType));
1104 }
1105 }
1106 }
1107 }
1108
1109 class RepeatedPipelineInstance : public vkt::TestInstance
1110 {
1111 public:
RepeatedPipelineInstance(Context & context,uint32_t xSize,uint32_t repeat,uint32_t odd)1112 RepeatedPipelineInstance(Context &context, uint32_t xSize, uint32_t repeat, uint32_t odd)
1113 : TestInstance(context)
1114 , m_xSize(xSize)
1115 , m_repeat(repeat)
1116 , m_odd(odd)
1117 {
1118 }
1119 tcu::TestStatus iterate(void);
1120
1121 private:
1122 uint32_t m_xSize;
1123 uint32_t m_repeat;
1124 uint32_t m_odd;
1125 };
1126
1127 class RepeatedPipelineTest : public vkt::TestCase
1128 {
1129 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,uint32_t xSize,uint32_t repeat,uint32_t odd,const vk::ComputePipelineConstructionType computePipelineConstructionType)1130 RepeatedPipelineTest(tcu::TestContext &testCtx, const std::string &name, uint32_t xSize, uint32_t repeat,
1131 uint32_t odd, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1132 : TestCase(testCtx, name)
1133 , m_xSize(xSize)
1134 , m_repeat(repeat)
1135 , m_odd(odd)
1136 , m_computePipelineConstructionType(computePipelineConstructionType)
1137 {
1138 }
1139
1140 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1141 TestInstance *createInstance(Context &context) const
1142 {
1143 return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1144 }
1145 virtual void checkSupport(Context &context) const;
1146
1147 private:
1148 uint32_t m_xSize;
1149 uint32_t m_repeat;
1150 uint32_t m_odd;
1151 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1152 };
1153
checkSupport(Context & context) const1154 void RepeatedPipelineTest::checkSupport(Context &context) const
1155 {
1156 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1157 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1158 m_computePipelineConstructionType);
1159 }
1160
initPrograms(SourceCollections & sourceCollections) const1161 void RepeatedPipelineTest::initPrograms(SourceCollections &sourceCollections) const
1162 {
1163 std::ostringstream src;
1164 src << "#version 450\n";
1165 src << "#extension GL_EXT_null_initializer : enable\n";
1166 src << "\n";
1167 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1168 src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1169 src << "\n";
1170 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1171 src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1172 src << "\n";
1173 src << "shared uint wg_mem[WGX][2] = {};\n";
1174 src << "void main() {\n";
1175 src << " if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1176 src << " wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + "
1177 "gl_LocalInvocationID.x];\n";
1178 src << " }\n";
1179 src << " barrier();\n";
1180 src << " a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = "
1181 "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1182 src << "}\n";
1183
1184 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1185 }
1186
iterate(void)1187 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1188 {
1189 Context &context = m_context;
1190 const uint32_t bufferSize = m_xSize * 2 * (uint32_t)sizeof(uint32_t);
1191 const uint32_t numBuffers = 2;
1192
1193 const DeviceInterface &vk = context.getDeviceInterface();
1194 const VkDevice device = context.getDevice();
1195 Allocator &allocator = context.getDefaultAllocator();
1196 tcu::TestLog &log = context.getTestContext().getLog();
1197
1198 de::MovePtr<BufferWithMemory> buffers[numBuffers];
1199 VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1200
1201 VkDeviceSize size = bufferSize;
1202 for (uint32_t i = 0; i < numBuffers; ++i)
1203 {
1204 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1205 vk, device, allocator,
1206 makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1207 VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1208 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1209 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1210 }
1211
1212 uint32_t *ptrs[numBuffers];
1213 for (uint32_t i = 0; i < numBuffers; ++i)
1214 {
1215 ptrs[i] = (uint32_t *)buffers[i]->getAllocation().getHostPtr();
1216 }
1217 for (uint32_t i = 0; i < bufferSize / sizeof(uint32_t); ++i)
1218 {
1219 ptrs[1][i] = i;
1220 }
1221 deMemset(ptrs[0], 0xff, (size_t)size);
1222
1223 DescriptorSetLayoutBuilder layoutBuilder;
1224 for (uint32_t i = 0; i < numBuffers; ++i)
1225 {
1226 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1227 }
1228
1229 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1230 Unique<VkDescriptorPool> descriptorPool(
1231 DescriptorPoolBuilder()
1232 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1233 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1234 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1235
1236 const uint32_t specData[1] = {
1237 m_xSize,
1238 };
1239 const vk::VkSpecializationMapEntry entries[1] = {
1240 {0, (uint32_t)(sizeof(uint32_t) * 0), sizeof(uint32_t)},
1241 };
1242 const vk::VkSpecializationInfo specInfo = {1, entries, sizeof(specData), specData};
1243
1244 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
1245 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1246 DE_NULL,
1247 (VkPipelineLayoutCreateFlags)0,
1248 1,
1249 &descriptorSetLayout.get(),
1250 0u,
1251 DE_NULL,
1252 };
1253 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1254 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1255
1256 for (uint32_t i = 0; i < numBuffers; ++i)
1257 {
1258 flushAlloc(vk, device, buffers[i]->getAllocation());
1259 }
1260
1261 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1262 const VkPipelineShaderStageCreateInfo shaderInfo = {
1263 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1264 DE_NULL,
1265 0,
1266 VK_SHADER_STAGE_COMPUTE_BIT,
1267 *shader,
1268 "main",
1269 &specInfo,
1270 };
1271
1272 const VkComputePipelineCreateInfo pipelineInfo = {
1273 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, DE_NULL, 0u, shaderInfo, *pipelineLayout, (VkPipeline)0, 0u,
1274 };
1275 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1276
1277 const VkQueue queue = context.getUniversalQueue();
1278 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1279 context.getUniversalQueueFamilyIndex());
1280 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1281
1282 DescriptorSetUpdateBuilder setUpdateBuilder;
1283 for (uint32_t i = 0; i < numBuffers; ++i)
1284 {
1285 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1286 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1287 }
1288 setUpdateBuilder.update(vk, device);
1289
1290 beginCommandBuffer(vk, *cmdBuffer, 0);
1291
1292 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1293 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1294
1295 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1296
1297 const VkMemoryBarrier barrier = {
1298 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1299 nullptr, // pNext
1300 VK_ACCESS_SHADER_WRITE_BIT, // srcAccessMask
1301 VK_ACCESS_HOST_READ_BIT, // dstAccessMask
1302 };
1303 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1304 (VkDependencyFlags)0, 1, &barrier, 0, nullptr, 0, nullptr);
1305
1306 endCommandBuffer(vk, *cmdBuffer);
1307
1308 for (uint32_t r = 0; r < m_repeat; ++r)
1309 {
1310 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1311
1312 invalidateAlloc(vk, device, buffers[0]->getAllocation());
1313
1314 for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
1315 {
1316 uint32_t expected = (m_odd == (i / m_xSize)) ? i : 0u;
1317 if (ptrs[0][i] != expected)
1318 {
1319 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected
1320 << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1321 return tcu::TestStatus::fail("compute failed");
1322 }
1323 }
1324
1325 deMemset(ptrs[0], 0xff, (size_t)size);
1326 flushAlloc(vk, device, buffers[0]->getAllocation());
1327 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1328 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1329 setUpdateBuilder.update(vk, device);
1330 }
1331
1332 return tcu::TestStatus::pass("compute succeeded");
1333 }
1334
AddRepeatedPipelineTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1335 void AddRepeatedPipelineTests(tcu::TestCaseGroup *group,
1336 vk::ComputePipelineConstructionType computePipelineConstructionType)
1337 {
1338 std::vector<uint32_t> xSizes = {4, 16, 32, 64};
1339 std::vector<uint32_t> odds = {0, 1};
1340 std::vector<uint32_t> repeats = {2, 4, 8, 16};
1341 for (uint32_t i = 0; i < xSizes.size(); ++i)
1342 {
1343 uint32_t x = xSizes[i];
1344 for (uint32_t j = 0; j < odds.size(); ++j)
1345 {
1346 uint32_t odd = odds[j];
1347 for (uint32_t k = 0; k < repeats.size(); ++k)
1348 {
1349 uint32_t repeat = repeats[k];
1350 group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1351 std::string("x_") + de::toString(x) +
1352 (odd == 1 ? "_odd" : "_even") + "_repeat_" +
1353 de::toString(repeat),
1354 x, odd, repeat, computePipelineConstructionType));
1355 }
1356 }
1357 }
1358 }
1359 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group)1360 void AddSharedMemoryTests(tcu::TestCaseGroup *group)
1361 {
1362 tcu::TestContext &testCtx = group->getTestContext();
1363 std::string filePath = "compute/zero_initialize_workgroup_memory";
1364 std::vector<std::string> requirements;
1365
1366 std::string testNames[] = {"workgroup_size_128", "workgroup_size_8x8x2", "workgroup_size_8x2x8",
1367 "workgroup_size_2x8x8", "workgroup_size_8x4x4", "workgroup_size_4x8x4",
1368 "workgroup_size_4x4x8"};
1369
1370 requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1371
1372 for (const auto &testName : testNames)
1373 {
1374 group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(),
1375 testName + ".amber", requirements));
1376 }
1377 }
1378 #endif // CTS_USES_VULKANSC
1379
1380 } // namespace
1381
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)1382 tcu::TestCaseGroup *createZeroInitializeWorkgroupMemoryTests(
1383 tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1384 {
1385 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory"));
1386
1387 tcu::TestCaseGroup *maxWorkgroupMemoryGroup =
1388 // Read initialization of max workgroup memory
1389 new tcu::TestCaseGroup(testCtx, "max_workgroup_memory");
1390 AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup, computePipelineConstructionType);
1391 tests->addChild(maxWorkgroupMemoryGroup);
1392
1393 tcu::TestCaseGroup *typeGroup = new tcu::TestCaseGroup(testCtx, "types");
1394 AddTypeTests(typeGroup, computePipelineConstructionType);
1395 tests->addChild(typeGroup);
1396
1397 tcu::TestCaseGroup *compositeGroup = new tcu::TestCaseGroup(testCtx, "composites");
1398 AddCompositeTests(compositeGroup, computePipelineConstructionType);
1399 tests->addChild(compositeGroup);
1400
1401 tcu::TestCaseGroup *maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups");
1402 AddMaxWorkgroupsTests(maxWorkgroupsGroup, computePipelineConstructionType);
1403 tests->addChild(maxWorkgroupsGroup);
1404
1405 tcu::TestCaseGroup *specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup");
1406 AddSpecializeWorkgroupTests(specializeWorkgroupGroup, computePipelineConstructionType);
1407 tests->addChild(specializeWorkgroupGroup);
1408
1409 tcu::TestCaseGroup *repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline");
1410 AddRepeatedPipelineTests(repeatPipelineGroup, computePipelineConstructionType);
1411 tests->addChild(repeatPipelineGroup);
1412
1413 #ifndef CTS_USES_VULKANSC
1414 // These are Amber tests and Amber cannot use shader objects.
1415 if (!isComputePipelineConstructionTypeShaderObject(computePipelineConstructionType))
1416 {
1417 tcu::TestCaseGroup *subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks");
1418 AddSharedMemoryTests(subgroupInvocationGroup);
1419 tests->addChild(subgroupInvocationGroup);
1420 }
1421 #endif // CTS_USES_VULKANSC
1422
1423 return tests.release();
1424 }
1425
1426 } // namespace compute
1427 } // namespace vkt
1428