1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 Google Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief SPIR-V assembly tests for workgroup memory.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
25 #include "vktSpvAsmComputeShaderCase.hpp"
26 #include "vktSpvAsmComputeShaderTestUtil.hpp"
27 #include "tcuStringTemplate.hpp"
28 #include "tcuFloat.hpp"
29 
30 namespace vkt
31 {
32 namespace SpirVAssembly
33 {
34 
35 using namespace vk;
36 using std::map;
37 using std::string;
38 using std::vector;
39 using tcu::Float16;
40 using tcu::Float32;
41 using tcu::IVec3;
42 using tcu::StringTemplate;
43 using tcu::Vec4;
44 
45 namespace
46 {
47 
48 struct DataType
49 {
50     string name;
51     string type;
52     uint32_t sizeBytes;
53 };
54 
checkResultsFloat16(const vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,tcu::TestLog & log)55 bool checkResultsFloat16(const vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
56                          const vector<Resource> &expectedOutputs, tcu::TestLog &log)
57 {
58     DE_UNREF(inputs);
59     DE_UNREF(log);
60 
61     std::vector<uint8_t> expectedBytes;
62     expectedOutputs.front().getBuffer()->getPackedBytes(expectedBytes);
63 
64     const uint16_t *results  = reinterpret_cast<const uint16_t *>(outputAllocs.front()->getHostPtr());
65     const uint16_t *expected = reinterpret_cast<const uint16_t *>(&expectedBytes[0]);
66 
67     for (size_t i = 0; i < expectedBytes.size() / sizeof(uint16_t); i++)
68     {
69         if (results[i] == expected[i])
70             continue;
71 
72         if (Float16(results[i]).isNaN() && Float16(expected[i]).isNaN())
73             continue;
74 
75         return false;
76     }
77 
78     return true;
79 }
80 
checkResultsFloat32(const vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,tcu::TestLog & log)81 bool checkResultsFloat32(const vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
82                          const vector<Resource> &expectedOutputs, tcu::TestLog &log)
83 {
84     DE_UNREF(inputs);
85     DE_UNREF(log);
86 
87     std::vector<uint8_t> expectedBytes;
88     expectedOutputs.front().getBuffer()->getPackedBytes(expectedBytes);
89 
90     const uint32_t *results  = reinterpret_cast<const uint32_t *>(outputAllocs.front()->getHostPtr());
91     const uint32_t *expected = reinterpret_cast<const uint32_t *>(&expectedBytes[0]);
92 
93     for (size_t i = 0; i < expectedBytes.size() / sizeof(uint32_t); i++)
94     {
95         if (results[i] == expected[i])
96             continue;
97 
98         if (Float32(results[i]).isNaN() && Float32(expected[i]).isNaN())
99             continue;
100 
101         return false;
102     }
103 
104     return true;
105 }
106 
isNanFloat64(uint64_t f)107 bool isNanFloat64(uint64_t f)
108 {
109     // NaN has full exponent bits and non-zero mantissa.
110     const uint64_t exponentBits = 0x7ff0000000000000;
111     const uint64_t mantissaBits = 0x000fffffffffffff;
112     return ((f & exponentBits) == exponentBits && (f & mantissaBits) != 0);
113 }
114 
checkResultsFloat64(const vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,tcu::TestLog & log)115 bool checkResultsFloat64(const vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
116                          const vector<Resource> &expectedOutputs, tcu::TestLog &log)
117 {
118     DE_UNREF(inputs);
119     DE_UNREF(log);
120 
121     std::vector<uint8_t> expectedBytes;
122     expectedOutputs.front().getBuffer()->getPackedBytes(expectedBytes);
123 
124     const uint64_t *results  = reinterpret_cast<const uint64_t *>(outputAllocs.front()->getHostPtr());
125     const uint64_t *expected = reinterpret_cast<const uint64_t *>(&expectedBytes[0]);
126 
127     for (size_t i = 0; i < expectedBytes.size() / sizeof(uint64_t); i++)
128     {
129         if (results[i] == expected[i])
130             continue;
131 
132         if (isNanFloat64(results[i]) && isNanFloat64(expected[i]))
133             continue;
134 
135         return false;
136     }
137 
138     return true;
139 }
140 
addComputeWorkgroupMemoryTests(tcu::TestCaseGroup * group)141 void addComputeWorkgroupMemoryTests(tcu::TestCaseGroup *group)
142 {
143     tcu::TestContext &testCtx = group->getTestContext();
144     de::Random rnd(deStringHash(group->getName()));
145     const int numElements = 128;
146 
147     /*
148     For each data type (TYPE) run the following shader:
149 
150     #version 430
151 
152     layout (local_size_x = 16, local_size_y = 4, local_size_z = 2) in;
153 
154     layout (binding = 0) buffer Input
155     {
156         TYPE data[128];
157     } dataInput;
158 
159     layout (binding = 1) buffer Output
160     {
161         TYPE data[128];
162     } dataOutput;
163 
164     shared TYPE sharedData[128];
165 
166     void main()
167     {
168         uint idx = gl_LocalInvocationID.z * 64 + gl_LocalInvocationID.y * 16 + gl_LocalInvocationID.x;
169         sharedData[idx] = dataInput.data[idx];
170         memoryBarrierShared();
171         barrier();
172         dataOutput.data[idx] = sharedData[127-idx];
173     }
174     */
175 
176     const StringTemplate shaderSource(
177         "                                     OpCapability Shader\n"
178         "${capabilities:opt}"
179         "${extensions:opt}"
180         "                                %1 = OpExtInstImport \"GLSL.std.450\"\n"
181         "                                     OpMemoryModel Logical GLSL450\n"
182         "                                     OpEntryPoint GLCompute %main \"main\" %gl_LocalInvocationID\n"
183         "                                     OpExecutionMode %main LocalSize 16 4 2\n"
184         "                                     OpSource GLSL 430\n"
185         "                                     OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId\n"
186         "                                     OpDecorate %_arr_uint_128_0 ArrayStride ${sizeBytes}\n"
187         "                                     OpMemberDecorate %Input 0 Offset 0\n"
188         "                                     OpDecorate %Input BufferBlock\n"
189         "                                     OpDecorate %dataInput DescriptorSet 0\n"
190         "                                     OpDecorate %dataInput Binding 0\n"
191         "                                     OpDecorate %_arr_uint_128_1 ArrayStride ${sizeBytes}\n"
192         "                                     OpMemberDecorate %Output 0 Offset 0\n"
193         "                                     OpDecorate %Output BufferBlock\n"
194         "                                     OpDecorate %dataOutput DescriptorSet 0\n"
195         "                                     OpDecorate %dataOutput Binding 1\n"
196         "                                     OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n"
197         "                             %void = OpTypeVoid\n"
198         "                                %3 = OpTypeFunction %void\n"
199         "                              %u32 = OpTypeInt 32 0\n"
200         "               %_ptr_Function_uint = OpTypePointer Function %u32\n"
201         "                           %v3uint = OpTypeVector %u32 3\n"
202         "                %_ptr_Input_v3uint = OpTypePointer Input %v3uint\n"
203         "             %gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input\n"
204         "                           %uint_2 = OpConstant %u32 2\n"
205         "                  %_ptr_Input_uint = OpTypePointer Input %u32\n"
206         "                          %uint_64 = OpConstant %u32 64\n"
207         "                           %uint_1 = OpConstant %u32 1\n"
208         "                          %uint_16 = OpConstant %u32 16\n"
209         "                           %uint_0 = OpConstant %u32 0\n"
210         "                         %uint_127 = OpConstant %u32 127\n"
211         "                           %uint_4 = OpConstant %u32 4\n"
212         "                              %i32 = OpTypeInt 32 1\n"
213         "${dataTypeDecl}\n"
214         "                         %uint_128 = OpConstant %u32 128\n"
215         "                    %_arr_uint_128 = OpTypeArray %${dataType} %uint_128\n"
216         "     %_ptr_Workgroup__arr_uint_128 = OpTypePointer Workgroup %_arr_uint_128\n"
217         "                       %sharedData = OpVariable %_ptr_Workgroup__arr_uint_128 Workgroup\n"
218         "                  %_arr_uint_128_0 = OpTypeArray %${dataType} %uint_128\n"
219         "                            %Input = OpTypeStruct %_arr_uint_128_0\n"
220         "               %_ptr_Uniform_Input = OpTypePointer Uniform %Input\n"
221         "                        %dataInput = OpVariable %_ptr_Uniform_Input Uniform\n"
222         "                            %int_0 = OpConstant %i32 0\n"
223         "                     %_ptr_Uniform = OpTypePointer Uniform %${dataType}\n"
224         "                   %_ptr_Workgroup = OpTypePointer Workgroup %${dataType}\n"
225         "                         %uint_264 = OpConstant %u32 264\n"
226         "                  %_arr_uint_128_1 = OpTypeArray %${dataType} %uint_128\n"
227         "                           %Output = OpTypeStruct %_arr_uint_128_1\n"
228         "              %_ptr_Uniform_Output = OpTypePointer Uniform %Output\n"
229         "                       %dataOutput = OpVariable %_ptr_Uniform_Output Uniform\n"
230         "                 %gl_WorkGroupSize = OpConstantComposite %v3uint %uint_16 %uint_4 %uint_2\n"
231         "                             %main = OpFunction %void None %3\n"
232         "                                %5 = OpLabel\n"
233         "                              %idx = OpVariable %_ptr_Function_uint Function\n"
234         "                               %14 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_2\n"
235         "                               %15 = OpLoad %u32 %14\n"
236         "                               %17 = OpIMul %u32 %15 %uint_64\n"
237         "                               %19 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1\n"
238         "                               %20 = OpLoad %u32 %19\n"
239         "                               %22 = OpIMul %u32 %20 %uint_16\n"
240         "                               %23 = OpIAdd %u32 %17 %22\n"
241         "                               %25 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0\n"
242         "                               %26 = OpLoad %u32 %25\n"
243         "                               %27 = OpIAdd %u32 %23 %26\n"
244         "                                     OpStore %idx %27\n"
245         "                               %33 = OpLoad %u32 %idx\n"
246         "                               %39 = OpLoad %u32 %idx\n"
247         "                               %41 = OpAccessChain %_ptr_Uniform %dataInput %int_0 %39\n"
248         "                               %42 = OpLoad %${dataType} %41\n"
249         "                               %44 = OpAccessChain %_ptr_Workgroup %sharedData %33\n"
250         "                                     OpStore %44 %42\n"
251         "                                     OpMemoryBarrier %uint_1 %uint_264\n"
252         "                                     OpControlBarrier %uint_2 %uint_2 %uint_264\n"
253         "                               %50 = OpLoad %u32 %idx\n"
254         "                               %52 = OpLoad %u32 %idx\n"
255         "                               %53 = OpISub %u32 %uint_127 %52\n"
256         "                               %54 = OpAccessChain %_ptr_Workgroup %sharedData %53\n"
257         "                               %55 = OpLoad %${dataType} %54\n"
258         "                               %56 = OpAccessChain %_ptr_Uniform %dataOutput %int_0 %50\n"
259         "                                     OpStore %56 %55\n"
260         "                                     OpReturn\n"
261         "                                     OpFunctionEnd\n");
262 
263     // float64
264     {
265         VulkanFeatures features;
266         map<string, string> shaderSpec;
267 
268         shaderSpec["sizeBytes"]    = "8";
269         shaderSpec["dataTypeDecl"] = "%f64 = OpTypeFloat 64";
270         shaderSpec["dataType"]     = "f64";
271         shaderSpec["capabilities"] = "OpCapability Float64\n";
272 
273         features.coreFeatures.shaderFloat64 = VK_TRUE;
274 
275         vector<double> inputData = getFloat64s(rnd, numElements);
276         vector<double> outputData;
277         ComputeShaderSpec spec;
278 
279         outputData.reserve(numElements);
280         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
281             outputData.push_back(inputData[numElements - numIdx - 1]);
282 
283         spec.assembly                = shaderSource.specialize(shaderSpec);
284         spec.numWorkGroups           = IVec3(1, 1, 1);
285         spec.verifyIO                = checkResultsFloat64;
286         spec.requestedVulkanFeatures = features;
287 
288         spec.inputs.push_back(Resource(BufferSp(new Float64Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
289         spec.outputs.push_back(Resource(BufferSp(new Float64Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
290 
291         group->addChild(new SpvAsmComputeShaderCase(testCtx, "float64", spec));
292     }
293 
294     // float32
295     {
296         map<string, string> shaderSpec;
297 
298         shaderSpec["sizeBytes"]    = "4";
299         shaderSpec["dataTypeDecl"] = "%f32 = OpTypeFloat 32";
300         shaderSpec["dataType"]     = "f32";
301 
302         vector<float> inputData = getFloat32s(rnd, numElements);
303         vector<float> outputData;
304         ComputeShaderSpec spec;
305 
306         outputData.reserve(numElements);
307         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
308             outputData.push_back(inputData[numElements - numIdx - 1]);
309 
310         spec.assembly      = shaderSource.specialize(shaderSpec);
311         spec.numWorkGroups = IVec3(1, 1, 1);
312         spec.verifyIO      = checkResultsFloat32;
313 
314         spec.inputs.push_back(Resource(BufferSp(new Float32Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
315         spec.outputs.push_back(Resource(BufferSp(new Float32Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
316 
317         group->addChild(new SpvAsmComputeShaderCase(testCtx, "float32", spec));
318     }
319 
320     // float16
321     {
322         VulkanFeatures features;
323         map<string, string> shaderSpec;
324 
325         shaderSpec["sizeBytes"]    = "2";
326         shaderSpec["dataTypeDecl"] = "%f16 = OpTypeFloat 16";
327         shaderSpec["dataType"]     = "f16";
328         shaderSpec["extensions"]   = "OpExtension \"SPV_KHR_16bit_storage\"\n";
329         shaderSpec["capabilities"] = "OpCapability StorageUniformBufferBlock16\nOpCapability Float16\n";
330 
331         features.ext16BitStorage.storageBuffer16BitAccess = true;
332         features.extFloat16Int8.shaderFloat16             = true;
333 
334         vector<deFloat16> inputData = getFloat16s(rnd, numElements);
335         vector<deFloat16> outputData;
336         ComputeShaderSpec spec;
337 
338         outputData.reserve(numElements);
339         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
340             outputData.push_back(inputData[numElements - numIdx - 1]);
341 
342         spec.assembly      = shaderSource.specialize(shaderSpec);
343         spec.numWorkGroups = IVec3(1, 1, 1);
344         spec.extensions.push_back("VK_KHR_16bit_storage");
345         spec.extensions.push_back("VK_KHR_shader_float16_int8");
346         spec.requestedVulkanFeatures = features;
347         spec.verifyIO                = checkResultsFloat16;
348 
349         spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
350         spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
351 
352         group->addChild(new SpvAsmComputeShaderCase(testCtx, "float16", spec));
353     }
354 
355     // int64
356     {
357         VulkanFeatures features;
358         map<string, string> shaderSpec;
359 
360         shaderSpec["sizeBytes"]    = "8";
361         shaderSpec["dataTypeDecl"] = "%i64 = OpTypeInt 64 1";
362         shaderSpec["dataType"]     = "i64";
363         shaderSpec["capabilities"] = "OpCapability Int64\n";
364 
365         features.coreFeatures.shaderInt64 = VK_TRUE;
366 
367         vector<int64_t> inputData = getInt64s(rnd, numElements);
368         vector<int64_t> outputData;
369         ComputeShaderSpec spec;
370 
371         outputData.reserve(numElements);
372         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
373             outputData.push_back(inputData[numElements - numIdx - 1]);
374 
375         spec.assembly                = shaderSource.specialize(shaderSpec);
376         spec.numWorkGroups           = IVec3(1, 1, 1);
377         spec.requestedVulkanFeatures = features;
378 
379         spec.inputs.push_back(Resource(BufferSp(new Int64Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
380         spec.outputs.push_back(Resource(BufferSp(new Int64Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
381 
382         group->addChild(new SpvAsmComputeShaderCase(testCtx, "int64", spec));
383     }
384 
385     // int32
386     {
387         map<string, string> shaderSpec;
388 
389         shaderSpec["sizeBytes"]    = "4";
390         shaderSpec["dataTypeDecl"] = "";
391         shaderSpec["dataType"]     = "i32";
392 
393         vector<int32_t> inputData = getInt32s(rnd, numElements);
394         vector<int32_t> outputData;
395         ComputeShaderSpec spec;
396 
397         outputData.reserve(numElements);
398         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
399             outputData.push_back(inputData[numElements - numIdx - 1]);
400 
401         spec.assembly      = shaderSource.specialize(shaderSpec);
402         spec.numWorkGroups = IVec3(1, 1, 1);
403 
404         spec.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
405         spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
406 
407         group->addChild(new SpvAsmComputeShaderCase(testCtx, "int32", spec));
408     }
409 
410     // int16
411     {
412         VulkanFeatures features;
413         map<string, string> shaderSpec;
414 
415         shaderSpec["sizeBytes"]    = "2";
416         shaderSpec["dataTypeDecl"] = "%i16 = OpTypeInt 16 1";
417         shaderSpec["dataType"]     = "i16";
418         shaderSpec["extensions"]   = "OpExtension \"SPV_KHR_16bit_storage\"\n";
419         shaderSpec["capabilities"] = "OpCapability Int16\n";
420 
421         features.coreFeatures.shaderInt16                 = true;
422         features.ext16BitStorage.storageBuffer16BitAccess = true;
423 
424         vector<int16_t> inputData = getInt16s(rnd, numElements);
425         vector<int16_t> outputData;
426         ComputeShaderSpec spec;
427 
428         outputData.reserve(numElements);
429         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
430             outputData.push_back(inputData[numElements - numIdx - 1]);
431 
432         spec.assembly      = shaderSource.specialize(shaderSpec);
433         spec.numWorkGroups = IVec3(1, 1, 1);
434         spec.extensions.push_back("VK_KHR_16bit_storage");
435         spec.requestedVulkanFeatures = features;
436 
437         spec.inputs.push_back(Resource(BufferSp(new Int16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
438         spec.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
439 
440         group->addChild(new SpvAsmComputeShaderCase(testCtx, "int16", spec));
441     }
442 
443     // int8
444     {
445         VulkanFeatures features;
446         map<string, string> shaderSpec;
447 
448         shaderSpec["sizeBytes"]    = "1";
449         shaderSpec["dataTypeDecl"] = "%i8 = OpTypeInt 8 1";
450         shaderSpec["dataType"]     = "i8";
451         shaderSpec["capabilities"] = "OpCapability UniformAndStorageBuffer8BitAccess\nOpCapability Int8\n";
452         shaderSpec["extensions"]   = "OpExtension \"SPV_KHR_8bit_storage\"\n";
453 
454         features.ext8BitStorage.uniformAndStorageBuffer8BitAccess = true;
455         features.extFloat16Int8.shaderInt8                        = true;
456 
457         vector<int8_t> inputData = getInt8s(rnd, numElements);
458         vector<int8_t> outputData;
459         ComputeShaderSpec spec;
460 
461         outputData.reserve(numElements);
462         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
463             outputData.push_back(inputData[numElements - numIdx - 1]);
464 
465         spec.assembly      = shaderSource.specialize(shaderSpec);
466         spec.numWorkGroups = IVec3(1, 1, 1);
467         spec.extensions.push_back("VK_KHR_8bit_storage");
468         spec.extensions.push_back("VK_KHR_shader_float16_int8");
469         spec.requestedVulkanFeatures = features;
470 
471         spec.inputs.push_back(Resource(BufferSp(new Int8Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
472         spec.outputs.push_back(Resource(BufferSp(new Int8Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
473 
474         group->addChild(new SpvAsmComputeShaderCase(testCtx, "int8", spec));
475     }
476 
477     // uint64
478     {
479         VulkanFeatures features;
480         map<string, string> shaderSpec;
481 
482         shaderSpec["sizeBytes"]    = "8";
483         shaderSpec["dataTypeDecl"] = "%u64 = OpTypeInt 64 0";
484         shaderSpec["dataType"]     = "u64";
485         shaderSpec["capabilities"] = "OpCapability Int64\n";
486 
487         features.coreFeatures.shaderInt64 = VK_TRUE;
488 
489         vector<uint64_t> inputData;
490         vector<uint64_t> outputData;
491         ComputeShaderSpec spec;
492 
493         inputData.reserve(numElements);
494         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
495             inputData.push_back(rnd.getUint64());
496 
497         outputData.reserve(numElements);
498         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
499             outputData.push_back(inputData[numElements - numIdx - 1]);
500 
501         spec.assembly                = shaderSource.specialize(shaderSpec);
502         spec.numWorkGroups           = IVec3(1, 1, 1);
503         spec.requestedVulkanFeatures = features;
504 
505         spec.inputs.push_back(Resource(BufferSp(new Uint64Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
506         spec.outputs.push_back(Resource(BufferSp(new Uint64Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
507 
508         group->addChild(new SpvAsmComputeShaderCase(testCtx, "uint64", spec));
509     }
510 
511     // uint32
512     {
513         map<string, string> shaderSpec;
514 
515         shaderSpec["sizeBytes"]    = "4";
516         shaderSpec["dataTypeDecl"] = "";
517         shaderSpec["dataType"]     = "u32";
518 
519         vector<uint32_t> inputData;
520         vector<uint32_t> outputData;
521         ComputeShaderSpec spec;
522 
523         inputData.reserve(numElements);
524         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
525             inputData.push_back(rnd.getUint32());
526 
527         outputData.reserve(numElements);
528         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
529             outputData.push_back(inputData[numElements - numIdx - 1]);
530 
531         spec.assembly      = shaderSource.specialize(shaderSpec);
532         spec.numWorkGroups = IVec3(1, 1, 1);
533 
534         spec.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
535         spec.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
536 
537         group->addChild(new SpvAsmComputeShaderCase(testCtx, "uint32", spec));
538     }
539 
540     // uint16
541     {
542         VulkanFeatures features;
543         map<string, string> shaderSpec;
544 
545         shaderSpec["sizeBytes"]    = "2";
546         shaderSpec["dataTypeDecl"] = "%u16 = OpTypeInt 16 0";
547         shaderSpec["dataType"]     = "u16";
548         shaderSpec["capabilities"] = "OpCapability Int16\n";
549         shaderSpec["extensions"]   = "OpExtension \"SPV_KHR_16bit_storage\"\n";
550 
551         features.coreFeatures.shaderInt16                 = true;
552         features.ext16BitStorage.storageBuffer16BitAccess = true;
553 
554         vector<uint16_t> inputData;
555         vector<uint16_t> outputData;
556         ComputeShaderSpec spec;
557 
558         inputData.reserve(numElements);
559         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
560             inputData.push_back(rnd.getUint16());
561 
562         outputData.reserve(numElements);
563         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
564             outputData.push_back(inputData[numElements - numIdx - 1]);
565 
566         spec.assembly      = shaderSource.specialize(shaderSpec);
567         spec.numWorkGroups = IVec3(1, 1, 1);
568         spec.extensions.push_back("VK_KHR_16bit_storage");
569         spec.requestedVulkanFeatures = features;
570 
571         spec.inputs.push_back(Resource(BufferSp(new Uint16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
572         spec.outputs.push_back(Resource(BufferSp(new Uint16Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
573 
574         group->addChild(new SpvAsmComputeShaderCase(testCtx, "uint16", spec));
575     }
576 
577     // uint8
578     {
579         VulkanFeatures features;
580         map<string, string> shaderSpec;
581 
582         shaderSpec["sizeBytes"]    = "1";
583         shaderSpec["dataTypeDecl"] = "%u8 = OpTypeInt 8 0";
584         shaderSpec["dataType"]     = "u8";
585         shaderSpec["capabilities"] = "OpCapability UniformAndStorageBuffer8BitAccess\nOpCapability Int8\n";
586         shaderSpec["extensions"]   = "OpExtension \"SPV_KHR_8bit_storage\"\n";
587 
588         features.ext8BitStorage.uniformAndStorageBuffer8BitAccess = true;
589         features.extFloat16Int8.shaderInt8                        = true;
590 
591         vector<uint8_t> inputData;
592         vector<uint8_t> outputData;
593         ComputeShaderSpec spec;
594 
595         inputData.reserve(numElements);
596         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
597             inputData.push_back(rnd.getUint8());
598 
599         outputData.reserve(numElements);
600         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
601             outputData.push_back(inputData[numElements - numIdx - 1]);
602 
603         spec.assembly      = shaderSource.specialize(shaderSpec);
604         spec.numWorkGroups = IVec3(1, 1, 1);
605         spec.extensions.push_back("VK_KHR_8bit_storage");
606         spec.extensions.push_back("VK_KHR_shader_float16_int8");
607         spec.requestedVulkanFeatures = features;
608 
609         spec.inputs.push_back(Resource(BufferSp(new Uint8Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
610         spec.outputs.push_back(Resource(BufferSp(new Uint8Buffer(outputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
611 
612         group->addChild(new SpvAsmComputeShaderCase(testCtx, "uint8", spec));
613     }
614 }
615 
616 } // namespace
617 
createWorkgroupMemoryComputeGroup(tcu::TestContext & testCtx)618 tcu::TestCaseGroup *createWorkgroupMemoryComputeGroup(tcu::TestContext &testCtx)
619 {
620     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "workgroup_memory"));
621     addComputeWorkgroupMemoryTests(group.get());
622 
623     return group.release();
624 }
625 
626 } // namespace SpirVAssembly
627 } // namespace vkt
628