1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 Google Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief SPIR-V Assembly Tests for the VK_KHR_16bit_storage
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSpvAsm16bitStorageTests.hpp"
25 
26 #include "tcuFloat.hpp"
27 #include "tcuRGBA.hpp"
28 #include "tcuStringTemplate.hpp"
29 #include "tcuTestLog.hpp"
30 #include "tcuVectorUtil.hpp"
31 
32 #include "vkDefs.hpp"
33 #include "vkDeviceUtil.hpp"
34 #include "vkMemUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkRef.hpp"
39 #include "vkRefUtil.hpp"
40 #include "vkStrUtil.hpp"
41 #include "vkTypeUtil.hpp"
42 
43 #include "deRandom.hpp"
44 #include "deStringUtil.hpp"
45 #include "deUniquePtr.hpp"
46 #include "deMath.h"
47 
48 #include "vktSpvAsmComputeShaderCase.hpp"
49 #include "vktSpvAsmComputeShaderTestUtil.hpp"
50 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
51 #include "vktSpvAsmUtils.hpp"
52 #include "vktTestCaseUtil.hpp"
53 #include "vktTestGroupUtil.hpp"
54 
55 #include <limits>
56 #include <map>
57 #include <string>
58 #include <sstream>
59 #include <utility>
60 
61 namespace vkt
62 {
63 namespace SpirVAssembly
64 {
65 
66 using namespace vk;
67 using de::UniquePtr;
68 using std::map;
69 using std::string;
70 using std::vector;
71 using tcu::Float16;
72 using tcu::IVec3;
73 using tcu::IVec4;
74 using tcu::RGBA;
75 using tcu::StringTemplate;
76 using tcu::TestLog;
77 using tcu::TestStatus;
78 using tcu::Vec4;
79 
80 namespace
81 {
82 
83 enum ShaderTemplate
84 {
85     SHADERTEMPLATE_TYPES = 0,
86     SHADERTEMPLATE_STRIDE32BIT_STD140,
87     SHADERTEMPLATE_STRIDE32BIT_STD430,
88     SHADERTEMPLATE_STRIDE16BIT_STD140,
89     SHADERTEMPLATE_STRIDE16BIT_STD430,
90     SHADERTEMPLATE_STRIDEMIX_STD140,
91     SHADERTEMPLATE_STRIDEMIX_STD430
92 };
93 
compare16Bit(float original,uint16_t returned,RoundingModeFlags flags,tcu::TestLog & log)94 bool compare16Bit(float original, uint16_t returned, RoundingModeFlags flags, tcu::TestLog &log)
95 {
96     return compare16BitFloat(original, returned, flags, log);
97 }
98 
compare16Bit(uint16_t original,float returned,RoundingModeFlags flags,tcu::TestLog & log)99 bool compare16Bit(uint16_t original, float returned, RoundingModeFlags flags, tcu::TestLog &log)
100 {
101     DE_UNREF(flags);
102     return compare16BitFloat(original, returned, log);
103 }
104 
compare16Bit(int16_t original,int16_t returned,RoundingModeFlags flags,tcu::TestLog & log)105 bool compare16Bit(int16_t original, int16_t returned, RoundingModeFlags flags, tcu::TestLog &log)
106 {
107     DE_UNREF(flags);
108     DE_UNREF(log);
109     return (returned == original);
110 }
111 
112 struct StructTestData
113 {
114     const int structArraySize; //Size of Struct Array
115     const int nestedArraySize; //Max size of any nested arrays
116 };
117 
118 struct Capability
119 {
120     const char *name;
121     const char *cap;
122     const char *decor;
123     vk::VkDescriptorType dtype;
124 };
125 
126 static const Capability CAPABILITIES[] = {
127     {"uniform_buffer_block", "StorageUniformBufferBlock16", "BufferBlock", VK_DESCRIPTOR_TYPE_STORAGE_BUFFER},
128     {"uniform", "StorageUniform16", "Block", VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER},
129 };
130 
131 static const StructTestData structData = {7, 11};
132 
133 enum TestDefDataType
134 {
135     DATATYPE_FLOAT,
136     DATATYPE_VEC2,
137     DATATYPE_INT,
138     DATATYPE_UINT,
139     DATATYPE_IVEC2,
140     DATATYPE_UVEC2
141 };
142 
143 struct TestDefinition
144 {
145     InstanceContext instanceContext;
146     TestDefDataType dataType;
147 };
148 
get16BitStorageFeatures(const char * cap)149 VulkanFeatures get16BitStorageFeatures(const char *cap)
150 {
151     VulkanFeatures features;
152     if (string(cap) == "uniform_buffer_block")
153         features.ext16BitStorage.storageBuffer16BitAccess = true;
154     else if (string(cap) == "uniform")
155         features.ext16BitStorage.uniformAndStorageBuffer16BitAccess = true;
156     else
157         DE_ASSERT(false && "not supported");
158 
159     return features;
160 }
161 
getStructSize(const ShaderTemplate shaderTemplate)162 int getStructSize(const ShaderTemplate shaderTemplate)
163 {
164     switch (shaderTemplate)
165     {
166     case SHADERTEMPLATE_STRIDE16BIT_STD140:
167         return 600 * structData.structArraySize; //size of struct in f16 with offsets
168     case SHADERTEMPLATE_STRIDE16BIT_STD430:
169         return 184 * structData.structArraySize; //size of struct in f16 with offsets
170     case SHADERTEMPLATE_STRIDE32BIT_STD140:
171         return 304 * structData.structArraySize; //size of struct in f32 with offsets
172     case SHADERTEMPLATE_STRIDE32BIT_STD430:
173         return 184 * structData.structArraySize; //size of struct in f32 with offset
174     case SHADERTEMPLATE_STRIDEMIX_STD140:
175         return 4480 * structData.structArraySize / 2; //size of struct in 16b with offset
176     case SHADERTEMPLATE_STRIDEMIX_STD430:
177         return 1216 * structData.structArraySize / 2; //size of struct in 16b with offset
178     default:
179         DE_ASSERT(0);
180     }
181     return 0;
182 }
183 
184 // Batch function to check arrays of 16-bit floats.
185 //
186 // For comparing 16-bit floats, we need to consider both RTZ and RTE. So we can only recalculate
187 // the expected values here instead of get the expected values directly from the test case.
188 // Thus we need original floats here but not expected outputs.
189 template <RoundingModeFlags RoundingMode>
graphicsCheck16BitFloats(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)190 bool graphicsCheck16BitFloats(const std::vector<Resource> &originalFloats, const vector<AllocationSp> &outputAllocs,
191                               const std::vector<Resource> &expectedOutputs, tcu::TestLog &log)
192 {
193     if (outputAllocs.size() != originalFloats.size())
194         return false;
195 
196     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
197     {
198         vector<uint8_t> originalBytes;
199         originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
200 
201         const uint16_t *returned   = static_cast<const uint16_t *>(outputAllocs[outputNdx]->getHostPtr());
202         const float *original      = reinterpret_cast<const float *>(&originalBytes.front());
203         const uint32_t count       = static_cast<uint32_t>(expectedOutputs[outputNdx].getByteSize() / sizeof(uint16_t));
204         const uint32_t inputStride = static_cast<uint32_t>(originalBytes.size() / sizeof(float)) / count;
205 
206         for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
207             if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
208                 return false;
209     }
210 
211     return true;
212 }
213 
214 template <RoundingModeFlags RoundingMode>
graphicsCheck16BitFloats64(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)215 bool graphicsCheck16BitFloats64(const std::vector<Resource> &originalFloats, const vector<AllocationSp> &outputAllocs,
216                                 const std::vector<Resource> & /* expectedOutputs */, tcu::TestLog &log)
217 {
218     if (outputAllocs.size() != originalFloats.size())
219         return false;
220 
221     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
222     {
223         vector<uint8_t> originalBytes;
224         originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
225 
226         const uint16_t *returned = static_cast<const uint16_t *>(outputAllocs[outputNdx]->getHostPtr());
227         const double *original   = reinterpret_cast<const double *>(&originalBytes.front());
228         const uint32_t count     = static_cast<uint32_t>(originalBytes.size() / sizeof(double));
229 
230         for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
231             if (!compare16BitFloat64(original[numNdx], returned[numNdx], RoundingMode, log))
232                 return false;
233     }
234 
235     return true;
236 }
237 
computeCheckBuffersFloats(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog &)238 bool computeCheckBuffersFloats(const std::vector<Resource> &originalFloats, const vector<AllocationSp> &outputAllocs,
239                                const std::vector<Resource> & /*expectedOutputs*/, tcu::TestLog & /*log*/)
240 {
241     std::vector<uint8_t> result;
242     originalFloats.front().getBuffer()->getPackedBytes(result);
243 
244     const uint16_t *results  = reinterpret_cast<const uint16_t *>(&result[0]);
245     const uint16_t *expected = reinterpret_cast<const uint16_t *>(outputAllocs.front()->getHostPtr());
246 
247     for (size_t i = 0; i < result.size() / sizeof(uint16_t); ++i)
248     {
249         if (results[i] == expected[i])
250             continue;
251 
252         if (Float16(results[i]).isNaN() && Float16(expected[i]).isNaN())
253             continue;
254 
255         return false;
256     }
257 
258     return true;
259 }
260 
261 template <RoundingModeFlags RoundingMode>
computeCheck16BitFloats(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)262 bool computeCheck16BitFloats(const std::vector<Resource> &originalFloats, const vector<AllocationSp> &outputAllocs,
263                              const std::vector<Resource> &expectedOutputs, tcu::TestLog &log)
264 {
265     if (outputAllocs.size() != originalFloats.size())
266         return false;
267 
268     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
269     {
270         vector<uint8_t> originalBytes;
271         originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
272 
273         const uint16_t *returned   = static_cast<const uint16_t *>(outputAllocs[outputNdx]->getHostPtr());
274         const float *original      = reinterpret_cast<const float *>(&originalBytes.front());
275         const uint32_t count       = static_cast<uint32_t>(expectedOutputs[outputNdx].getByteSize() / sizeof(uint16_t));
276         const uint32_t inputStride = static_cast<uint32_t>(originalBytes.size() / sizeof(float)) / count;
277 
278         for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
279             if (!compare16BitFloat(original[numNdx * inputStride], returned[numNdx], RoundingMode, log))
280                 return false;
281     }
282 
283     return true;
284 }
285 
286 template <RoundingModeFlags RoundingMode>
computeCheck16BitFloats64(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)287 bool computeCheck16BitFloats64(const std::vector<Resource> &originalFloats, const vector<AllocationSp> &outputAllocs,
288                                const std::vector<Resource> & /* expectedOutputs */, tcu::TestLog &log)
289 {
290     if (outputAllocs.size() != originalFloats.size())
291         return false;
292 
293     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
294     {
295         vector<uint8_t> originalBytes;
296         originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
297 
298         const uint16_t *returned = static_cast<const uint16_t *>(outputAllocs[outputNdx]->getHostPtr());
299         const double *original   = reinterpret_cast<const double *>(&originalBytes.front());
300         const uint32_t count     = static_cast<uint32_t>(originalBytes.size() / sizeof(double));
301 
302         for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
303             if (!compare16BitFloat64(original[numNdx], returned[numNdx], RoundingMode, log))
304                 return false;
305     }
306 
307     return true;
308 }
309 
310 // Batch function to check arrays of 64-bit floats.
311 //
312 // For comparing 64-bit floats, we just need the expected value precomputed in the test case.
313 // So we need expected outputs here but not original floats.
check64BitFloats(const std::vector<Resource> &,const std::vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)314 bool check64BitFloats(const std::vector<Resource> & /* originalFloats */, const std::vector<AllocationSp> &outputAllocs,
315                       const std::vector<Resource> &expectedOutputs, tcu::TestLog &log)
316 {
317     if (outputAllocs.size() != expectedOutputs.size())
318         return false;
319 
320     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
321     {
322         vector<uint8_t> expectedBytes;
323         expectedOutputs[outputNdx].getBuffer()->getPackedBytes(expectedBytes);
324 
325         const double *returnedAsDouble = static_cast<const double *>(outputAllocs[outputNdx]->getHostPtr());
326         const double *expectedAsDouble = reinterpret_cast<const double *>(&expectedBytes.front());
327         const uint32_t count           = static_cast<uint32_t>(expectedBytes.size() / sizeof(double));
328 
329         for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
330             if (!compare64BitFloat(expectedAsDouble[numNdx], returnedAsDouble[numNdx], log))
331                 return false;
332     }
333 
334     return true;
335 }
336 
337 // Batch function to check arrays of 32-bit floats.
338 //
339 // For comparing 32-bit floats, we just need the expected value precomputed in the test case.
340 // So we need expected outputs here but not original floats.
check32BitFloats(const std::vector<Resource> &,const std::vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)341 bool check32BitFloats(const std::vector<Resource> & /* originalFloats */, const std::vector<AllocationSp> &outputAllocs,
342                       const std::vector<Resource> &expectedOutputs, tcu::TestLog &log)
343 {
344     if (outputAllocs.size() != expectedOutputs.size())
345         return false;
346 
347     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
348     {
349         vector<uint8_t> expectedBytes;
350         expectedOutputs[outputNdx].getBuffer()->getPackedBytes(expectedBytes);
351 
352         const float *returnedAsFloat = static_cast<const float *>(outputAllocs[outputNdx]->getHostPtr());
353         const float *expectedAsFloat = reinterpret_cast<const float *>(&expectedBytes.front());
354         const uint32_t count         = static_cast<uint32_t>(expectedBytes.size() / sizeof(float));
355 
356         for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
357             if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
358                 return false;
359     }
360 
361     return true;
362 }
363 
addInfo(vector<bool> & info,int & ndx,const int count,bool isData)364 void addInfo(vector<bool> &info, int &ndx, const int count, bool isData)
365 {
366     for (int index = 0; index < count; ++index)
367         info[ndx++] = isData;
368 }
369 
data16bitStd140(de::Random & rnd)370 vector<deFloat16> data16bitStd140(de::Random &rnd)
371 {
372     return getFloat16s(rnd, getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD140));
373 }
374 
info16bitStd140(void)375 vector<bool> info16bitStd140(void)
376 {
377     int ndx = 0u;
378     vector<bool> infoData(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD140));
379 
380     for (int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
381     {
382         infoData[ndx++] = true;  //f16
383         infoData[ndx++] = false; //offset
384 
385         infoData[ndx++] = true; //v2f16
386         infoData[ndx++] = true; //v2f16
387 
388         addInfo(infoData, ndx, 3, true); //v3f16
389         infoData[ndx++] = false;         //offset
390 
391         addInfo(infoData, ndx, 4, true);  //v4f16
392         addInfo(infoData, ndx, 4, false); //offset
393 
394         //f16[3];
395         for (int i = 0; i < 3; ++i)
396         {
397             infoData[ndx++] = true;           //f16[0];
398             addInfo(infoData, ndx, 7, false); //offset
399         }
400 
401         //struct {f16, v2f16[3]} [11]
402         for (int i = 0; i < structData.nestedArraySize; ++i)
403         {
404             //struct.f16
405             infoData[ndx++] = true;           //f16
406             addInfo(infoData, ndx, 7, false); //offset
407             //struct.f16.v2f16[3]
408             for (int j = 0; j < 3; ++j)
409             {
410                 infoData[ndx++] = true;           //v2f16
411                 infoData[ndx++] = true;           //v2f16
412                 addInfo(infoData, ndx, 6, false); //offset
413             }
414         }
415 
416         //vec2[11];
417         for (int i = 0; i < structData.nestedArraySize; ++i)
418         {
419             infoData[ndx++] = true;           //v2f16
420             infoData[ndx++] = true;           //v2f16
421             addInfo(infoData, ndx, 6, false); //offset
422         }
423 
424         //f16
425         infoData[ndx++] = true;           //f16
426         addInfo(infoData, ndx, 7, false); //offset
427 
428         //vec3[11]
429         for (int i = 0; i < structData.nestedArraySize; ++i)
430         {
431             addInfo(infoData, ndx, 3, true);  //vec3
432             addInfo(infoData, ndx, 5, false); //offset
433         }
434 
435         //vec4[3]
436         for (int i = 0; i < 3; ++i)
437         {
438             addInfo(infoData, ndx, 4, true);  //vec4
439             addInfo(infoData, ndx, 4, false); //offset
440         }
441     }
442 
443     //Please check the data and offset
444     DE_ASSERT(ndx == static_cast<int>(infoData.size()));
445 
446     return infoData;
447 }
448 
data16bitStd430(de::Random & rnd)449 vector<deFloat16> data16bitStd430(de::Random &rnd)
450 {
451     return getFloat16s(rnd, getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430));
452 }
453 
info16bitStd430(void)454 vector<bool> info16bitStd430(void)
455 {
456     int ndx = 0u;
457     vector<bool> infoData(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430));
458 
459     for (int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
460     {
461         infoData[ndx++] = true;  //f16
462         infoData[ndx++] = false; //offset
463 
464         infoData[ndx++] = true; //v2f16
465         infoData[ndx++] = true; //v2f16
466 
467         addInfo(infoData, ndx, 3, true); //v3f16
468         infoData[ndx++] = false;         //offset
469 
470         addInfo(infoData, ndx, 4, true); //v4f16
471 
472         //f16[3];
473         for (int i = 0; i < 3; ++i)
474         {
475             infoData[ndx++] = true; //f16;
476         }
477         addInfo(infoData, ndx, 1, false); //offset
478 
479         //struct {f16, v2f16[3]} [11]
480         for (int i = 0; i < structData.nestedArraySize; ++i)
481         {
482             //struct.f16
483             infoData[ndx++] = true;  //f16
484             infoData[ndx++] = false; //offset
485             //struct.f16.v2f16[3]
486             for (int j = 0; j < 3; ++j)
487             {
488                 infoData[ndx++] = true; //v2f16
489                 infoData[ndx++] = true; //v2f16
490             }
491         }
492 
493         //vec2[11];
494         for (int i = 0; i < structData.nestedArraySize; ++i)
495         {
496             infoData[ndx++] = true; //v2f16
497             infoData[ndx++] = true; //v2f16
498         }
499 
500         //f16
501         infoData[ndx++] = true;  //f16
502         infoData[ndx++] = false; //offset
503 
504         //vec3[11]
505         for (int i = 0; i < structData.nestedArraySize; ++i)
506         {
507             addInfo(infoData, ndx, 3, true); //vec3
508             infoData[ndx++] = false;         //offset
509         }
510 
511         //vec4[3]
512         for (int i = 0; i < 3; ++i)
513         {
514             addInfo(infoData, ndx, 4, true); //vec4
515         }
516     }
517 
518     //Please check the data and offset
519     DE_ASSERT(ndx == static_cast<int>(infoData.size()));
520     return infoData;
521 }
522 
data32bitStd140(de::Random & rnd)523 vector<float> data32bitStd140(de::Random &rnd)
524 {
525     return getFloat32s(rnd, getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD140));
526 }
527 
info32bitStd140(void)528 vector<bool> info32bitStd140(void)
529 {
530     int ndx = 0u;
531     vector<bool> infoData(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD140));
532 
533     for (int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
534     {
535         infoData[ndx++] = true;  //f32
536         infoData[ndx++] = false; //offset
537 
538         infoData[ndx++] = true; //v2f32
539         infoData[ndx++] = true; //v2f32
540 
541         addInfo(infoData, ndx, 3, true); //v3f32
542         infoData[ndx++] = false;         //offset
543 
544         addInfo(infoData, ndx, 4, true); //v4f16
545 
546         //f32[3];
547         for (int i = 0; i < 3; ++i)
548         {
549             infoData[ndx++] = true;           //f32;
550             addInfo(infoData, ndx, 3, false); //offset
551         }
552 
553         //struct {f32, v2f32[3]} [11]
554         for (int i = 0; i < structData.nestedArraySize; ++i)
555         {
556             //struct.f32
557             infoData[ndx++] = true;           //f32
558             addInfo(infoData, ndx, 3, false); //offset
559             //struct.f32.v2f16[3]
560             for (int j = 0; j < 3; ++j)
561             {
562                 infoData[ndx++] = true;  //v2f32
563                 infoData[ndx++] = true;  //v2f32
564                 infoData[ndx++] = false; //offset
565                 infoData[ndx++] = false; //offset
566             }
567         }
568 
569         //v2f32[11];
570         for (int i = 0; i < structData.nestedArraySize; ++i)
571         {
572             infoData[ndx++] = true;  //v2f32
573             infoData[ndx++] = true;  //v2f32
574             infoData[ndx++] = false; //offset
575             infoData[ndx++] = false; //offset
576         }
577 
578         //f16
579         infoData[ndx++] = true;           //f16
580         addInfo(infoData, ndx, 3, false); //offset
581 
582         //vec3[11]
583         for (int i = 0; i < structData.nestedArraySize; ++i)
584         {
585             addInfo(infoData, ndx, 3, true); //v3f32
586             infoData[ndx++] = false;         //offset
587         }
588 
589         //vec4[3]
590         for (int i = 0; i < 3; ++i)
591         {
592             addInfo(infoData, ndx, 4, true); //vec4
593         }
594     }
595 
596     //Please check the data and offset
597     DE_ASSERT(ndx == static_cast<int>(infoData.size()));
598     return infoData;
599 }
600 
data32bitStd430(de::Random & rnd)601 vector<float> data32bitStd430(de::Random &rnd)
602 {
603     return getFloat32s(rnd, getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430));
604 }
605 
info32bitStd430(void)606 vector<bool> info32bitStd430(void)
607 {
608     int ndx = 0u;
609     vector<bool> infoData(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430));
610 
611     for (int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
612     {
613         infoData[ndx++] = true;  //f32
614         infoData[ndx++] = false; //offset
615 
616         infoData[ndx++] = true; //v2f32
617         infoData[ndx++] = true; //v2f32
618 
619         addInfo(infoData, ndx, 3, true); //v3f32
620         infoData[ndx++] = false;         //offset
621 
622         addInfo(infoData, ndx, 4, true); //v4f16
623 
624         //f32[3];
625         for (int i = 0; i < 3; ++i)
626         {
627             infoData[ndx++] = true; //f32;
628         }
629         infoData[ndx++] = false; //offset
630 
631         //struct {f32, v2f32[3]} [11]
632         for (int i = 0; i < structData.nestedArraySize; ++i)
633         {
634             //struct.f32
635             infoData[ndx++] = true;  //f32
636             infoData[ndx++] = false; //offset
637             //struct.f32.v2f16[3]
638             for (int j = 0; j < 3; ++j)
639             {
640                 infoData[ndx++] = true; //v2f32
641                 infoData[ndx++] = true; //v2f32
642             }
643         }
644 
645         //v2f32[11];
646         for (int i = 0; i < structData.nestedArraySize; ++i)
647         {
648             infoData[ndx++] = true; //v2f32
649             infoData[ndx++] = true; //v2f32
650         }
651 
652         //f32
653         infoData[ndx++] = true;  //f32
654         infoData[ndx++] = false; //offset
655 
656         //vec3[11]
657         for (int i = 0; i < structData.nestedArraySize; ++i)
658         {
659             addInfo(infoData, ndx, 3, true); //v3f32
660             infoData[ndx++] = false;         //offset
661         }
662 
663         //vec4[3]
664         for (int i = 0; i < 3; ++i)
665         {
666             addInfo(infoData, ndx, 4, true); //vec4
667         }
668     }
669 
670     //Please check the data and offset
671     DE_ASSERT(ndx == static_cast<int>(infoData.size()));
672     return infoData;
673 }
674 
dataMixStd140(de::Random & rnd)675 vector<int16_t> dataMixStd140(de::Random &rnd)
676 {
677     return getInt16s(rnd, getStructSize(SHADERTEMPLATE_STRIDEMIX_STD140));
678 }
679 
infoMixStd140(void)680 vector<bool> infoMixStd140(void)
681 {
682     int ndx = 0u;
683     vector<bool> infoData(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD140));
684     for (int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
685     {
686         infoData[ndx++] = true;           //16b
687         addInfo(infoData, ndx, 1, false); //offset
688 
689         addInfo(infoData, ndx, 2, true); //32b
690 
691         addInfo(infoData, ndx, 2, true);  //v2b16
692         addInfo(infoData, ndx, 2, false); //offset
693 
694         addInfo(infoData, ndx, 4, true); //v2b32
695 
696         addInfo(infoData, ndx, 3, true);  //v3b16
697         addInfo(infoData, ndx, 1, false); //offset
698 
699         addInfo(infoData, ndx, 6, true);  //v3b32
700         addInfo(infoData, ndx, 2, false); //offset
701 
702         addInfo(infoData, ndx, 4, true);  //v4b16
703         addInfo(infoData, ndx, 4, false); //offset
704 
705         addInfo(infoData, ndx, 8, true); //v4b32
706 
707         //strut {b16, b32, v2b16[11], b32[11]}
708         for (int i = 0; i < structData.nestedArraySize; ++i)
709         {
710             infoData[ndx++] = true;           //16b
711             addInfo(infoData, ndx, 1, false); //offset
712 
713             addInfo(infoData, ndx, 2, true);  //32b
714             addInfo(infoData, ndx, 4, false); //offset
715 
716             for (int j = 0; j < structData.nestedArraySize; ++j)
717             {
718                 addInfo(infoData, ndx, 2, true);  //v2b16[11]
719                 addInfo(infoData, ndx, 6, false); //offset
720             }
721 
722             for (int j = 0; j < structData.nestedArraySize; ++j)
723             {
724                 addInfo(infoData, ndx, 2, true);  //b32[11]
725                 addInfo(infoData, ndx, 6, false); //offset
726             }
727         }
728 
729         for (int i = 0; i < structData.nestedArraySize; ++i)
730         {
731             infoData[ndx++] = true;           //16b[11]
732             addInfo(infoData, ndx, 7, false); //offset
733         }
734 
735         for (int i = 0; i < structData.nestedArraySize; ++i)
736         {
737             addInfo(infoData, ndx, 2, true);  //b32bIn[11]
738             addInfo(infoData, ndx, 6, false); //offset
739         }
740     }
741 
742     //Please check the data and offset
743     DE_ASSERT(ndx == static_cast<int>(infoData.size()));
744     return infoData;
745 }
746 
dataMixStd430(de::Random & rnd)747 vector<int16_t> dataMixStd430(de::Random &rnd)
748 {
749     return getInt16s(rnd, getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430));
750 }
751 
infoMixStd430(void)752 vector<bool> infoMixStd430(void)
753 {
754     int ndx = 0u;
755     vector<bool> infoData(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430));
756     for (int elementNdx = 0; elementNdx < structData.structArraySize; ++elementNdx)
757     {
758         infoData[ndx++] = true;           //16b
759         addInfo(infoData, ndx, 1, false); //offset
760 
761         addInfo(infoData, ndx, 2, true); //32b
762 
763         addInfo(infoData, ndx, 2, true);  //v2b16
764         addInfo(infoData, ndx, 2, false); //offset
765 
766         addInfo(infoData, ndx, 4, true); //v2b32
767 
768         addInfo(infoData, ndx, 3, true);  //v3b16
769         addInfo(infoData, ndx, 1, false); //offset
770 
771         addInfo(infoData, ndx, 6, true);  //v3b32
772         addInfo(infoData, ndx, 2, false); //offset
773 
774         addInfo(infoData, ndx, 4, true);  //v4b16
775         addInfo(infoData, ndx, 4, false); //offset
776 
777         addInfo(infoData, ndx, 8, true); //v4b32
778 
779         //strut {b16, b32, v2b16[11], b32[11]}
780         for (int i = 0; i < structData.nestedArraySize; ++i)
781         {
782             infoData[ndx++] = true;           //16b
783             addInfo(infoData, ndx, 1, false); //offset
784 
785             addInfo(infoData, ndx, 2, true); //32b
786 
787             addInfo(infoData, ndx, 22, true); //v2b16[11]
788 
789             addInfo(infoData, ndx, 22, true); //b32[11]
790         }
791 
792         addInfo(infoData, ndx, 11, true); //16b[11]
793         infoData[ndx++] = false;          //offset
794 
795         addInfo(infoData, ndx, 22, true); //32b[11]
796         addInfo(infoData, ndx, 6, false); //offset
797     }
798 
799     //Please check the data and offset
800     DE_ASSERT(ndx == static_cast<int>(infoData.size()));
801     return infoData;
802 }
803 
804 template <typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
compareStruct(const resultType * returned,const originType * original,tcu::TestLog & log)805 bool compareStruct(const resultType *returned, const originType *original, tcu::TestLog &log)
806 {
807     vector<bool> resultInfo;
808     vector<bool> originInfo;
809     vector<resultType> resultToCompare;
810     vector<originType> originToCompare;
811 
812     switch (funcOrigin)
813     {
814     case SHADERTEMPLATE_STRIDE16BIT_STD140:
815         originInfo = info16bitStd140();
816         break;
817     case SHADERTEMPLATE_STRIDE16BIT_STD430:
818         originInfo = info16bitStd430();
819         break;
820     case SHADERTEMPLATE_STRIDE32BIT_STD140:
821         originInfo = info32bitStd140();
822         break;
823     case SHADERTEMPLATE_STRIDE32BIT_STD430:
824         originInfo = info32bitStd430();
825         break;
826     case SHADERTEMPLATE_STRIDEMIX_STD140:
827         originInfo = infoMixStd140();
828         break;
829     case SHADERTEMPLATE_STRIDEMIX_STD430:
830         originInfo = infoMixStd430();
831         break;
832     default:
833         DE_ASSERT(0);
834     }
835 
836     switch (funcResult)
837     {
838     case SHADERTEMPLATE_STRIDE16BIT_STD140:
839         resultInfo = info16bitStd140();
840         break;
841     case SHADERTEMPLATE_STRIDE16BIT_STD430:
842         resultInfo = info16bitStd430();
843         break;
844     case SHADERTEMPLATE_STRIDE32BIT_STD140:
845         resultInfo = info32bitStd140();
846         break;
847     case SHADERTEMPLATE_STRIDE32BIT_STD430:
848         resultInfo = info32bitStd430();
849         break;
850     case SHADERTEMPLATE_STRIDEMIX_STD140:
851         resultInfo = infoMixStd140();
852         break;
853     case SHADERTEMPLATE_STRIDEMIX_STD430:
854         resultInfo = infoMixStd430();
855         break;
856     default:
857         DE_ASSERT(0);
858     }
859 
860     for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(resultInfo.size()); ++ndx)
861     {
862         if (resultInfo[ndx])
863             resultToCompare.push_back(returned[ndx]);
864     }
865 
866     for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(originInfo.size()); ++ndx)
867     {
868         if (originInfo[ndx])
869             originToCompare.push_back(original[ndx]);
870     }
871 
872     //Different offset but that same amount of data
873     DE_ASSERT(originToCompare.size() == resultToCompare.size());
874     for (unsigned int ndx = 0; ndx < static_cast<unsigned int>(originToCompare.size()); ++ndx)
875     {
876         if (!compare16Bit(originToCompare[ndx], resultToCompare[ndx],
877                           RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ), log))
878             return false;
879     }
880     return true;
881 }
882 
883 template <typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
computeCheckStruct(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)884 bool computeCheckStruct(const std::vector<Resource> &originalFloats, const vector<AllocationSp> &outputAllocs,
885                         const std::vector<Resource> & /* expectedOutputs */, tcu::TestLog &log)
886 {
887     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
888     {
889         vector<uint8_t> originalBytes;
890         originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
891 
892         const resultType *returned = static_cast<const resultType *>(outputAllocs[outputNdx]->getHostPtr());
893         const originType *original = reinterpret_cast<const originType *>(&originalBytes.front());
894 
895         if (!compareStruct<originType, resultType, funcOrigin, funcResult>(returned, original, log))
896             return false;
897     }
898     return true;
899 }
900 
901 template <typename originType, typename resultType, ShaderTemplate funcOrigin, ShaderTemplate funcResult>
graphicsCheckStruct(const std::vector<Resource> & originalFloats,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,tcu::TestLog & log)902 bool graphicsCheckStruct(const std::vector<Resource> &originalFloats, const vector<AllocationSp> &outputAllocs,
903                          const std::vector<Resource> & /* expectedOutputs */, tcu::TestLog &log)
904 {
905     for (uint32_t outputNdx = 0; outputNdx < static_cast<uint32_t>(outputAllocs.size()); ++outputNdx)
906     {
907         vector<uint8_t> originalBytes;
908         originalFloats[outputNdx].getBuffer()->getPackedBytes(originalBytes);
909 
910         const resultType *returned = static_cast<const resultType *>(outputAllocs[outputNdx]->getHostPtr());
911         const originType *original = reinterpret_cast<const originType *>(&originalBytes.front());
912 
913         if (!compareStruct<originType, resultType, funcOrigin, funcResult>(returned, original, log))
914             return false;
915     }
916     return true;
917 }
918 
getStructShaderComponet(const ShaderTemplate component)919 string getStructShaderComponet(const ShaderTemplate component)
920 {
921     switch (component)
922     {
923     case SHADERTEMPLATE_TYPES:
924         return string("%f16       = OpTypeFloat 16\n"
925                       "%v2f16     = OpTypeVector %f16 2\n"
926                       "%v3f16     = OpTypeVector %f16 3\n"
927                       "%v4f16     = OpTypeVector %f16 4\n"
928                       "%f16ptr    = OpTypePointer Uniform %f16\n"
929                       "%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
930                       "%v3f16ptr  = OpTypePointer Uniform %v3f16\n"
931                       "%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
932                       "\n"
933                       "%f32ptr   = OpTypePointer Uniform %f32\n"
934                       "%v2f32ptr = OpTypePointer Uniform %v2f32\n"
935                       "%v3f32ptr = OpTypePointer Uniform %v3f32\n"
936                       "%v4f32ptr = OpTypePointer Uniform %v4f32\n");
937     case SHADERTEMPLATE_STRIDE16BIT_STD140:
938         return string(
939             //struct {f16, v2f16[3]} [11]
940             "OpDecorate %v2f16arr3 ArrayStride 16\n"
941             "OpMemberDecorate %struct16 0 Offset 0\n"
942             "OpMemberDecorate %struct16 1 Offset 16\n"
943             "OpDecorate %struct16arr11 ArrayStride 64\n"
944 
945             "OpDecorate %f16arr3       ArrayStride 16\n"
946             "OpDecorate %v2f16arr11    ArrayStride 16\n"
947             "OpDecorate %v3f16arr11    ArrayStride 16\n"
948             "OpDecorate %v4f16arr3     ArrayStride 16\n"
949             "OpDecorate %f16StructArr7 ArrayStride 1200\n"
950             "\n"
951             "OpMemberDecorate %f16Struct 0 Offset 0\n"      //f16
952             "OpMemberDecorate %f16Struct 1 Offset 4\n"      //v2f16
953             "OpMemberDecorate %f16Struct 2 Offset 8\n"      //v3f16
954             "OpMemberDecorate %f16Struct 3 Offset 16\n"     //v4f16
955             "OpMemberDecorate %f16Struct 4 Offset 32\n"     //f16[3]
956             "OpMemberDecorate %f16Struct 5 Offset 80\n"     //struct {f16, v2f16[3]} [11]
957             "OpMemberDecorate %f16Struct 6 Offset 784\n"    //v2f16[11]
958             "OpMemberDecorate %f16Struct 7 Offset 960\n"    //f16
959             "OpMemberDecorate %f16Struct 8 Offset 976\n"    //v3f16[11]
960             "OpMemberDecorate %f16Struct 9 Offset 1152\n"); //v4f16[3]
961 
962     case SHADERTEMPLATE_STRIDE16BIT_STD430:
963         return string(
964             //struct {f16, v2f16[3]} [11]
965             "OpDecorate %v2f16arr3 ArrayStride 4\n"
966             "OpMemberDecorate %struct16 0 Offset 0\n"
967             "OpMemberDecorate %struct16 1 Offset 4\n"
968             "OpDecorate %struct16arr11 ArrayStride 16\n"
969 
970             "OpDecorate %f16arr3    ArrayStride 2\n"
971             "OpDecorate %v2f16arr11 ArrayStride 4\n"
972             "OpDecorate %v3f16arr11 ArrayStride 8\n"
973             "OpDecorate %v4f16arr3  ArrayStride 8\n"
974             "OpDecorate %f16StructArr7 ArrayStride 368\n"
975             "\n"
976             "OpMemberDecorate %f16Struct 0 Offset 0\n"     //f16
977             "OpMemberDecorate %f16Struct 1 Offset 4\n"     //v2f16
978             "OpMemberDecorate %f16Struct 2 Offset 8\n"     //v3f16
979             "OpMemberDecorate %f16Struct 3 Offset 16\n"    //v4f16
980             "OpMemberDecorate %f16Struct 4 Offset 24\n"    //f16[3]
981             "OpMemberDecorate %f16Struct 5 Offset 32\n"    //struct {f16, v2f16[3]} [11]
982             "OpMemberDecorate %f16Struct 6 Offset 208\n"   //v2f16[11]
983             "OpMemberDecorate %f16Struct 7 Offset 252\n"   //f16
984             "OpMemberDecorate %f16Struct 8 Offset 256\n"   //v3f16[11]
985             "OpMemberDecorate %f16Struct 9 Offset 344\n"); //v4f16[3]
986     case SHADERTEMPLATE_STRIDE32BIT_STD140:
987         return string(
988             //struct {f32, v2f32[3]} [11]
989             "OpDecorate %v2f32arr3 ArrayStride 16\n"
990             "OpMemberDecorate %struct32 0 Offset 0\n"
991             "OpMemberDecorate %struct32 1 Offset 16\n"
992             "OpDecorate %struct32arr11 ArrayStride 64\n"
993 
994             "OpDecorate %f32arr3   ArrayStride 16\n"
995             "OpDecorate %v2f32arr11 ArrayStride 16\n"
996             "OpDecorate %v3f32arr11 ArrayStride 16\n"
997             "OpDecorate %v4f32arr3 ArrayStride 16\n"
998             "OpDecorate %f32StructArr7 ArrayStride 1216\n"
999             "\n"
1000 
1001             "OpMemberDecorate %f32Struct 0 Offset 0\n"      //f32
1002             "OpMemberDecorate %f32Struct 1 Offset 8\n"      //v2f32
1003             "OpMemberDecorate %f32Struct 2 Offset 16\n"     //v3f32
1004             "OpMemberDecorate %f32Struct 3 Offset 32\n"     //v4f32
1005             "OpMemberDecorate %f32Struct 4 Offset 48\n"     //f32[3]
1006             "OpMemberDecorate %f32Struct 5 Offset 96\n"     //struct {f32, v2f32[3]} [11]
1007             "OpMemberDecorate %f32Struct 6 Offset 800\n"    //v2f32[11]
1008             "OpMemberDecorate %f32Struct 7 Offset 976\n"    //f32
1009             "OpMemberDecorate %f32Struct 8 Offset 992\n"    //v3f32[11]
1010             "OpMemberDecorate %f32Struct 9 Offset 1168\n"); //v4f32[3]
1011 
1012     case SHADERTEMPLATE_STRIDE32BIT_STD430:
1013         return string(
1014             //struct {f32, v2f32[3]} [11]
1015             "OpDecorate %v2f32arr3 ArrayStride 8\n"
1016             "OpMemberDecorate %struct32 0 Offset 0\n"
1017             "OpMemberDecorate %struct32 1 Offset 8\n"
1018             "OpDecorate %struct32arr11 ArrayStride 32\n"
1019 
1020             "OpDecorate %f32arr3    ArrayStride 4\n"
1021             "OpDecorate %v2f32arr11 ArrayStride 8\n"
1022             "OpDecorate %v3f32arr11 ArrayStride 16\n"
1023             "OpDecorate %v4f32arr3  ArrayStride 16\n"
1024             "OpDecorate %f32StructArr7 ArrayStride 736\n"
1025             "\n"
1026 
1027             "OpMemberDecorate %f32Struct 0 Offset 0\n"     //f32
1028             "OpMemberDecorate %f32Struct 1 Offset 8\n"     //v2f32
1029             "OpMemberDecorate %f32Struct 2 Offset 16\n"    //v3f32
1030             "OpMemberDecorate %f32Struct 3 Offset 32\n"    //v4f32
1031             "OpMemberDecorate %f32Struct 4 Offset 48\n"    //f32[3]
1032             "OpMemberDecorate %f32Struct 5 Offset 64\n"    //struct {f32, v2f32[3]}[11]
1033             "OpMemberDecorate %f32Struct 6 Offset 416\n"   //v2f32[11]
1034             "OpMemberDecorate %f32Struct 7 Offset 504\n"   //f32
1035             "OpMemberDecorate %f32Struct 8 Offset 512\n"   //v3f32[11]
1036             "OpMemberDecorate %f32Struct 9 Offset 688\n"); //v4f32[3]
1037     case SHADERTEMPLATE_STRIDEMIX_STD140:
1038         return string(
1039             "\n"                                                    //strutNestedIn {b16, b32, v2b16[11], b32[11]}
1040             "OpDecorate %v2b16NestedArr11${InOut} ArrayStride 16\n" //v2b16[11]
1041             "OpDecorate %b32NestedArr11${InOut} ArrayStride 16\n"   //b32[11]
1042             "OpMemberDecorate %sNested${InOut} 0 Offset 0\n"        //b16
1043             "OpMemberDecorate %sNested${InOut} 1 Offset 4\n"        //b32
1044             "OpMemberDecorate %sNested${InOut} 2 Offset 16\n"       //v2b16[11]
1045             "OpMemberDecorate %sNested${InOut} 3 Offset 192\n"      //b32[11]
1046             "OpDecorate %sNestedArr11${InOut} ArrayStride 368\n"    //strutNestedIn[11]
1047             "\n" //strutIn {b16, b32, v2b16, v2b32, v3b16, v3b32, v4b16, v4b32, strutNestedIn[11], b16In[11], b32bIn[11]}
1048             "OpDecorate %sb16Arr11${InOut} ArrayStride 16\n"      //b16In[11]
1049             "OpDecorate %sb32Arr11${InOut} ArrayStride 16\n"      //b32bIn[11]
1050             "OpMemberDecorate %struct${InOut} 0 Offset 0\n"       //b16
1051             "OpMemberDecorate %struct${InOut} 1 Offset 4\n"       //b32
1052             "OpMemberDecorate %struct${InOut} 2 Offset 8\n"       //v2b16
1053             "OpMemberDecorate %struct${InOut} 3 Offset 16\n"      //v2b32
1054             "OpMemberDecorate %struct${InOut} 4 Offset 24\n"      //v3b16
1055             "OpMemberDecorate %struct${InOut} 5 Offset 32\n"      //v3b32
1056             "OpMemberDecorate %struct${InOut} 6 Offset 48\n"      //v4b16
1057             "OpMemberDecorate %struct${InOut} 7 Offset 64\n"      //v4b32
1058             "OpMemberDecorate %struct${InOut} 8 Offset 80\n"      //strutNestedIn[11]
1059             "OpMemberDecorate %struct${InOut} 9 Offset 4128\n"    //b16In[11]
1060             "OpMemberDecorate %struct${InOut} 10 Offset 4304\n"   //b32bIn[11]
1061             "OpDecorate %structArr7${InOut} ArrayStride 4480\n"); //strutIn[7]
1062     case SHADERTEMPLATE_STRIDEMIX_STD430:
1063         return string(
1064             "\n"                                                   //strutNestedOut {b16, b32, v2b16[11], b32[11]}
1065             "OpDecorate %v2b16NestedArr11${InOut} ArrayStride 4\n" //v2b16[11]
1066             "OpDecorate %b32NestedArr11${InOut}  ArrayStride 4\n"  //b32[11]
1067             "OpMemberDecorate %sNested${InOut} 0 Offset 0\n"       //b16
1068             "OpMemberDecorate %sNested${InOut} 1 Offset 4\n"       //b32
1069             "OpMemberDecorate %sNested${InOut} 2 Offset 8\n"       //v2b16[11]
1070             "OpMemberDecorate %sNested${InOut} 3 Offset 52\n"      //b32[11]
1071             "OpDecorate %sNestedArr11${InOut} ArrayStride 96\n"    //strutNestedOut[11]
1072             "\n" //strutOut {b16, b32, v2b16, v2b32, v3b16, v3b32, v4b16, v4b32, strutNestedOut[11], b16Out[11], b32bOut[11]}
1073             "OpDecorate %sb16Arr11${InOut} ArrayStride 2\n"       //b16Out[11]
1074             "OpDecorate %sb32Arr11${InOut} ArrayStride 4\n"       //b32bOut[11]
1075             "OpMemberDecorate %struct${InOut} 0 Offset 0\n"       //b16
1076             "OpMemberDecorate %struct${InOut} 1 Offset 4\n"       //b32
1077             "OpMemberDecorate %struct${InOut} 2 Offset 8\n"       //v2b16
1078             "OpMemberDecorate %struct${InOut} 3 Offset 16\n"      //v2b32
1079             "OpMemberDecorate %struct${InOut} 4 Offset 24\n"      //v3b16
1080             "OpMemberDecorate %struct${InOut} 5 Offset 32\n"      //v3b32
1081             "OpMemberDecorate %struct${InOut} 6 Offset 48\n"      //v4b16
1082             "OpMemberDecorate %struct${InOut} 7 Offset 64\n"      //v4b32
1083             "OpMemberDecorate %struct${InOut} 8 Offset 80\n"      //strutNestedOut[11]
1084             "OpMemberDecorate %struct${InOut} 9 Offset 1136\n"    //b16Out[11]
1085             "OpMemberDecorate %struct${InOut} 10 Offset 1160\n"   //b32bOut[11]
1086             "OpDecorate %structArr7${InOut} ArrayStride 1216\n"); //strutOut[7]
1087 
1088     default:
1089         return string("");
1090     }
1091 }
1092 
1093 /*Return string contains spirv loop begin.
1094  the spec should contains "exeCount" - with name of const i32, it is number of executions
1095  the spec should contains "loopName" - suffix for all local names
1096  %Val${loopName} - index which can be used inside loop
1097  "%ndxArr${loopName}   = OpVariable %fp_i32  Function\n" - has to be defined outside
1098  The function should be always use with endLoop function*/
beginLoop(const std::map<std::string,std::string> & spec)1099 std::string beginLoop(const std::map<std::string, std::string> &spec)
1100 {
1101     const tcu::StringTemplate loopBegin(
1102         "OpStore %ndxArr${loopName} %zero\n"
1103         "OpBranch %Loop${loopName}\n"
1104         "%Loop${loopName} = OpLabel\n"
1105         "OpLoopMerge %MergeLabel1${loopName} %MergeLabel2${loopName} None\n"
1106         "OpBranch %Label1${loopName}\n"
1107         "%Label1${loopName} = OpLabel\n"
1108         "%Val${loopName} = OpLoad %i32 %ndxArr${loopName}\n"
1109         "%LessThan${loopName} = OpSLessThan %bool %Val${loopName} %${exeCount}\n"
1110         "OpBranchConditional %LessThan${loopName} %ifLabel${loopName} %MergeLabel1${loopName}\n"
1111         "%ifLabel${loopName} = OpLabel\n");
1112     return loopBegin.specialize(spec);
1113 }
1114 /*Return string contains spirv loop end.
1115  the spec should contains "loopName" - suffix for all local names, suffix should be the same in beginLoop
1116 The function should be always use with beginLoop function*/
endLoop(const std::map<std::string,std::string> & spec)1117 std::string endLoop(const std::map<std::string, std::string> &spec)
1118 {
1119     const tcu::StringTemplate loopEnd("OpBranch %MergeLabel2${loopName}\n"
1120                                       "%MergeLabel2${loopName} = OpLabel\n"
1121                                       "%plusOne${loopName} = OpIAdd %i32 %Val${loopName} %c_i32_1\n"
1122                                       "OpStore %ndxArr${loopName} %plusOne${loopName}\n"
1123                                       "OpBranch %Loop${loopName}\n"
1124                                       "%MergeLabel1${loopName} = OpLabel\n");
1125     return loopEnd.specialize(spec);
1126 }
1127 
addCompute16bitStorageUniform16To32Group(tcu::TestCaseGroup * group)1128 void addCompute16bitStorageUniform16To32Group(tcu::TestCaseGroup *group)
1129 {
1130     tcu::TestContext &testCtx = group->getTestContext();
1131     de::Random rnd(deStringHash(group->getName()));
1132     const int numElements = 128;
1133 
1134     const StringTemplate shaderTemplate(
1135         "OpCapability Shader\n"
1136         "OpCapability ${capability}\n"
1137         "OpExtension \"SPV_KHR_16bit_storage\"\n"
1138         "OpMemoryModel Logical GLSL450\n"
1139         "OpEntryPoint GLCompute %main \"main\" %id\n"
1140         "OpExecutionMode %main LocalSize 1 1 1\n"
1141         "OpDecorate %id BuiltIn GlobalInvocationId\n"
1142 
1143         "${stride}\n"
1144 
1145         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1146         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1147         "OpDecorate %SSBO32 BufferBlock\n"
1148         "OpDecorate %SSBO16 ${storage}\n"
1149         "OpDecorate %ssbo32 DescriptorSet 0\n"
1150         "OpDecorate %ssbo16 DescriptorSet 0\n"
1151         "OpDecorate %ssbo32 Binding 1\n"
1152         "OpDecorate %ssbo16 Binding 0\n"
1153 
1154         "${matrix_decor:opt}\n"
1155 
1156         "%bool      = OpTypeBool\n"
1157         "%void      = OpTypeVoid\n"
1158         "%voidf     = OpTypeFunction %void\n"
1159         "%u32       = OpTypeInt 32 0\n"
1160         "%i32       = OpTypeInt 32 1\n"
1161         "%f32       = OpTypeFloat 32\n"
1162         "%v3u32     = OpTypeVector %u32 3\n"
1163         "%uvec3ptr  = OpTypePointer Input %v3u32\n"
1164         "%i32ptr    = OpTypePointer Uniform %i32\n"
1165         "%f32ptr    = OpTypePointer Uniform %f32\n"
1166 
1167         "%zero      = OpConstant %i32 0\n"
1168         "%c_i32_1   = OpConstant %i32 1\n"
1169         "%c_i32_2   = OpConstant %i32 2\n"
1170         "%c_i32_3   = OpConstant %i32 3\n"
1171         "%c_i32_16  = OpConstant %i32 16\n"
1172         "%c_i32_32  = OpConstant %i32 32\n"
1173         "%c_i32_64  = OpConstant %i32 64\n"
1174         "%c_i32_128 = OpConstant %i32 128\n"
1175         "%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
1176 
1177         "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
1178         "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
1179 
1180         "${types}\n"
1181         "${matrix_types:opt}\n"
1182 
1183         "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
1184         "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
1185         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1186         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1187         "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
1188         "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
1189 
1190         "%id        = OpVariable %uvec3ptr Input\n"
1191 
1192         "%main      = OpFunction %void None %voidf\n"
1193         "%label     = OpLabel\n"
1194         "%idval     = OpLoad %v3u32 %id\n"
1195         "%x         = OpCompositeExtract %u32 %idval 0\n"
1196         "%inloc     = OpAccessChain %${base16}ptr %ssbo16 %zero %${arrayindex} ${index0:opt}\n"
1197         "%val16     = OpLoad %${base16} %inloc\n"
1198         "%val32     = ${convert} %${base32} %val16\n"
1199         "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
1200         "             OpStore %outloc %val32\n"
1201         "${matrix_store:opt}\n"
1202         "             OpReturn\n"
1203         "             OpFunctionEnd\n");
1204 
1205     { // floats
1206         const char floatTypes[] = "%f16       = OpTypeFloat 16\n"
1207                                   "%f16ptr    = OpTypePointer Uniform %f16\n"
1208                                   "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
1209                                   "%v2f16     = OpTypeVector %f16 2\n"
1210                                   "%v2f32     = OpTypeVector %f32 2\n"
1211                                   "%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
1212                                   "%v2f32ptr  = OpTypePointer Uniform %v2f32\n"
1213                                   "%v2f16arr  = OpTypeArray %v2f16 %c_i32_64\n"
1214                                   "%v2f32arr  = OpTypeArray %v2f32 %c_i32_64\n";
1215 
1216         struct CompositeType
1217         {
1218             const char *name;
1219             const char *base32;
1220             const char *base16;
1221             const char *stride;
1222             bool useConstantIndex;
1223             unsigned constantIndex;
1224             unsigned count;
1225             unsigned inputStride;
1226         };
1227 
1228         const CompositeType cTypes[2][5] = {
1229             {{"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0,
1230               numElements, 1},
1231              {"scalar_const_idx_5", "f32", "f16",
1232               "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 5, numElements, 1},
1233              {"scalar_const_idx_8", "f32", "f16",
1234               "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", true, 8, numElements, 1},
1235              {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n",
1236               false, 0, numElements / 2, 2},
1237              {"matrix", "v2f32", "v2f16",
1238               "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0,
1239               numElements / 8, 8}},
1240             {{"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", false, 0,
1241               numElements, 8},
1242              {"scalar_const_idx_5", "f32", "f16",
1243               "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 5, numElements, 8},
1244              {"scalar_const_idx_8", "f32", "f16",
1245               "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 16\n", true, 8, numElements, 8},
1246              {"vector", "v2f32", "v2f16", "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 16\n",
1247               false, 0, numElements / 2, 8},
1248              {"matrix", "v2f32", "v2f16",
1249               "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n", false, 0,
1250               numElements / 8, 8}}};
1251 
1252         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1253             for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
1254             {
1255                 ComputeShaderSpec spec;
1256                 map<string, string> specs;
1257                 string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float";
1258 
1259                 specs["capability"]    = CAPABILITIES[capIdx].cap;
1260                 specs["storage"]       = CAPABILITIES[capIdx].decor;
1261                 specs["stride"]        = cTypes[capIdx][tyIdx].stride;
1262                 specs["base32"]        = cTypes[capIdx][tyIdx].base32;
1263                 specs["base16"]        = cTypes[capIdx][tyIdx].base16;
1264                 specs["types"]         = floatTypes;
1265                 specs["convert"]       = "OpFConvert";
1266                 specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex);
1267                 if (cTypes[capIdx][tyIdx].useConstantIndex)
1268                     specs["arrayindex"] = "c_i32_ci";
1269                 else
1270                     specs["arrayindex"] = "x";
1271 
1272                 const uint32_t inputStride    = cTypes[capIdx][tyIdx].inputStride;
1273                 const uint32_t count          = cTypes[capIdx][tyIdx].count;
1274                 const uint32_t scalarsPerItem = numElements / count;
1275                 vector<deFloat16> float16Data = getFloat16s(rnd, numElements * inputStride);
1276                 vector<float> float32Data;
1277 
1278                 float32Data.reserve(numElements);
1279                 for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
1280                     for (uint32_t scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
1281                         float32Data.push_back(deFloat16To32(float16Data[numIdx * inputStride + scalarIdx]));
1282 
1283                 vector<float> float32DataConstIdx;
1284                 if (cTypes[capIdx][tyIdx].useConstantIndex)
1285                 {
1286                     const uint32_t numFloats = numElements / cTypes[capIdx][tyIdx].count;
1287                     for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
1288                         float32DataConstIdx.push_back(
1289                             float32Data[cTypes[capIdx][tyIdx].constantIndex * numFloats + numIdx % numFloats]);
1290                 }
1291 
1292                 if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
1293                 {
1294                     specs["index0"]        = "%zero";
1295                     specs["matrix_prefix"] = "m4";
1296                     specs["matrix_types"]  = "%m4v2f16 = OpTypeMatrix %v2f16 4\n"
1297                                              "%m4v2f32 = OpTypeMatrix %v2f32 4\n"
1298                                              "%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
1299                                              "%m4v2f32arr = OpTypeArray %m4v2f32 %c_i32_16\n";
1300                     specs["matrix_decor"]  = "OpMemberDecorate %SSBO32 0 ColMajor\n"
1301                                              "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
1302                                              "OpMemberDecorate %SSBO16 0 ColMajor\n"
1303                                              "OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
1304                     specs["matrix_store"]  = "%inloc_1  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
1305                                              "%val16_1  = OpLoad %v2f16 %inloc_1\n"
1306                                              "%val32_1  = OpFConvert %v2f32 %val16_1\n"
1307                                              "%outloc_1 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_1\n"
1308                                              "            OpStore %outloc_1 %val32_1\n"
1309 
1310                                             "%inloc_2  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
1311                                             "%val16_2  = OpLoad %v2f16 %inloc_2\n"
1312                                             "%val32_2  = OpFConvert %v2f32 %val16_2\n"
1313                                             "%outloc_2 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_2\n"
1314                                             "            OpStore %outloc_2 %val32_2\n"
1315 
1316                                             "%inloc_3  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
1317                                             "%val16_3  = OpLoad %v2f16 %inloc_3\n"
1318                                             "%val32_3  = OpFConvert %v2f32 %val16_3\n"
1319                                             "%outloc_3 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_3\n"
1320                                             "            OpStore %outloc_3 %val32_3\n";
1321                 }
1322 
1323                 spec.assembly      = shaderTemplate.specialize(specs);
1324                 spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
1325                 spec.verifyIO      = check32BitFloats;
1326 
1327                 spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), CAPABILITIES[capIdx].dtype));
1328                 spec.outputs.push_back(Resource(BufferSp(
1329                     new Float32Buffer(cTypes[capIdx][tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))));
1330                 spec.extensions.push_back("VK_KHR_16bit_storage");
1331                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1332 
1333                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1334             }
1335     }
1336 
1337     { // Integers
1338         const char sintTypes[] = "%i16       = OpTypeInt 16 1\n"
1339                                  "%i16ptr    = OpTypePointer Uniform %i16\n"
1340                                  "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
1341                                  "%v4i16     = OpTypeVector %i16 4\n"
1342                                  "%v4i32     = OpTypeVector %i32 4\n"
1343                                  "%v4i16ptr  = OpTypePointer Uniform %v4i16\n"
1344                                  "%v4i32ptr  = OpTypePointer Uniform %v4i32\n"
1345                                  "%v4i16arr  = OpTypeArray %v4i16 %c_i32_32\n"
1346                                  "%v4i32arr  = OpTypeArray %v4i32 %c_i32_32\n";
1347 
1348         const char uintTypes[] = "%u16       = OpTypeInt 16 0\n"
1349                                  "%u16ptr    = OpTypePointer Uniform %u16\n"
1350                                  "%u32ptr    = OpTypePointer Uniform %u32\n"
1351                                  "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
1352                                  "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
1353                                  "%v4u16     = OpTypeVector %u16 4\n"
1354                                  "%v4u32     = OpTypeVector %u32 4\n"
1355                                  "%v4u16ptr  = OpTypePointer Uniform %v4u16\n"
1356                                  "%v4u32ptr  = OpTypePointer Uniform %v4u32\n"
1357                                  "%v4u16arr  = OpTypeArray %v4u16 %c_i32_32\n"
1358                                  "%v4u32arr  = OpTypeArray %v4u32 %c_i32_32\n";
1359 
1360         struct CompositeType
1361         {
1362             const char *name;
1363             bool isSigned;
1364             const char *types;
1365             const char *base32;
1366             const char *base16;
1367             const char *opcode;
1368             const char *stride;
1369             bool useConstantIndex;
1370             unsigned constantIndex;
1371             unsigned count;
1372             unsigned inputStride;
1373         };
1374 
1375         const CompositeType cTypes[2][8] = {
1376             {{"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert",
1377               "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements, 1},
1378              {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert",
1379               "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements, 1},
1380              {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert",
1381               "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements, 1},
1382              {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert",
1383               "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements, 1},
1384              {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert",
1385               "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements, 1},
1386              {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert",
1387               "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements, 1},
1388              {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert",
1389               "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n", false, 0, numElements / 4,
1390               4},
1391              {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert",
1392               "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n", false, 0, numElements / 4,
1393               4}},
1394             {{"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert",
1395               "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", false, 0, numElements, 8},
1396              {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert",
1397               "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 5, numElements, 8},
1398              {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert",
1399               "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 16\n", true, 8, numElements, 8},
1400              {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert",
1401               "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", false, 0, numElements, 8},
1402              {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert",
1403               "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 5, numElements, 8},
1404              {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert",
1405               "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 16\n", true, 8, numElements, 8},
1406              {"vector_sint", true, sintTypes, "v4i32", "v4i16", "OpSConvert",
1407               "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 16\n", false, 0, numElements / 4,
1408               8},
1409              {"vector_uint", false, uintTypes, "v4u32", "v4u16", "OpUConvert",
1410               "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 16\n", false, 0, numElements / 4,
1411               8}}};
1412 
1413         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1414             for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
1415             {
1416                 ComputeShaderSpec spec;
1417                 map<string, string> specs;
1418                 string testName            = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
1419                 const uint32_t inputStride = cTypes[capIdx][tyIdx].inputStride;
1420                 vector<int16_t> inputs     = getInt16s(rnd, numElements * inputStride);
1421                 vector<int32_t> sOutputs;
1422                 vector<int32_t> uOutputs;
1423                 const uint16_t signBitMask    = 0x8000;
1424                 const uint32_t signExtendMask = 0xffff0000;
1425                 const uint32_t count          = cTypes[capIdx][tyIdx].count;
1426                 const uint32_t scalarsPerItem = numElements / count;
1427 
1428                 sOutputs.reserve(numElements);
1429                 uOutputs.reserve(numElements);
1430 
1431                 for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
1432                     for (uint32_t scalarIdx = 0; scalarIdx < scalarsPerItem; ++scalarIdx)
1433                     {
1434                         const int16_t input = inputs[numNdx * inputStride + scalarIdx];
1435 
1436                         uOutputs.push_back(static_cast<uint16_t>(input));
1437                         if (input & signBitMask)
1438                             sOutputs.push_back(static_cast<int32_t>(input | signExtendMask));
1439                         else
1440                             sOutputs.push_back(static_cast<int32_t>(input));
1441                     }
1442 
1443                 vector<int32_t> intDataConstIdx;
1444 
1445                 if (cTypes[capIdx][tyIdx].useConstantIndex)
1446                 {
1447                     for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
1448                     {
1449                         const int32_t idx =
1450                             cTypes[capIdx][tyIdx].constantIndex * scalarsPerItem + numIdx % scalarsPerItem;
1451 
1452                         if (cTypes[capIdx][tyIdx].isSigned)
1453                             intDataConstIdx.push_back(sOutputs[idx]);
1454                         else
1455                             intDataConstIdx.push_back(uOutputs[idx]);
1456                     }
1457                 }
1458 
1459                 specs["capability"]    = CAPABILITIES[capIdx].cap;
1460                 specs["storage"]       = CAPABILITIES[capIdx].decor;
1461                 specs["stride"]        = cTypes[capIdx][tyIdx].stride;
1462                 specs["base32"]        = cTypes[capIdx][tyIdx].base32;
1463                 specs["base16"]        = cTypes[capIdx][tyIdx].base16;
1464                 specs["types"]         = cTypes[capIdx][tyIdx].types;
1465                 specs["convert"]       = cTypes[capIdx][tyIdx].opcode;
1466                 specs["constarrayidx"] = de::toString(cTypes[capIdx][tyIdx].constantIndex);
1467                 if (cTypes[capIdx][tyIdx].useConstantIndex)
1468                     specs["arrayindex"] = "c_i32_ci";
1469                 else
1470                     specs["arrayindex"] = "x";
1471 
1472                 spec.assembly      = shaderTemplate.specialize(specs);
1473                 spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
1474 
1475                 spec.inputs.push_back(Resource(BufferSp(new Int16Buffer(inputs)), CAPABILITIES[capIdx].dtype));
1476                 if (cTypes[capIdx][tyIdx].useConstantIndex)
1477                     spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(intDataConstIdx))));
1478                 else if (cTypes[capIdx][tyIdx].isSigned)
1479                     spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(sOutputs))));
1480                 else
1481                     spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(uOutputs))));
1482                 spec.extensions.push_back("VK_KHR_16bit_storage");
1483                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1484 
1485                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1486             }
1487     }
1488 }
1489 
addCompute16bitStorageUniform16To32ChainAccessGroup(tcu::TestCaseGroup * group)1490 void addCompute16bitStorageUniform16To32ChainAccessGroup(tcu::TestCaseGroup *group)
1491 {
1492     tcu::TestContext &testCtx = group->getTestContext();
1493     de::Random rnd(deStringHash(group->getName()));
1494     const uint32_t structSize        = 128; // In number of 16bit items. Includes padding.
1495     vector<deFloat16> inputDataFloat = getFloat16s(rnd, structSize * 4);
1496     vector<int16_t> inputDataInt     = getInt16s(rnd, structSize * 4);
1497     vector<float> outputDataFloat;
1498     vector<int32_t> outputDataSInt;
1499     vector<int32_t> outputDataUInt;
1500     vector<tcu::UVec4> indices;
1501 
1502     // Input is an array of a struct that varies on 16bit data type being tested:
1503     //
1504     // Float:
1505     //
1506     // float16 scalars[3]
1507     // mat4x3  matrix
1508     // vec3    vector
1509     //
1510     // Int:
1511     //
1512     // int16 scalars[3]
1513     // int16 array2D[4][3]
1514     // ivec3 vector
1515     //
1516     // UInt:
1517     //
1518     // uint16 scalars[3]
1519     // uint16 array2D[4][3]
1520     // uvec3  vector
1521 
1522     const StringTemplate shaderTemplate(
1523         "                              OpCapability Shader\n"
1524         "                              OpCapability ${capability}\n"
1525         "                              OpExtension \"SPV_KHR_16bit_storage\"\n"
1526         "                         %1 = OpExtInstImport \"GLSL.std.450\"\n"
1527         "                              OpMemoryModel Logical GLSL450\n"
1528         "                              OpEntryPoint GLCompute %main \"main\"\n"
1529         "                              OpExecutionMode %main LocalSize 1 1 1\n"
1530         "                              OpSource GLSL 430\n"
1531         "                              OpDecorate %Output BufferBlock\n"
1532         "                              OpDecorate %dataOutput DescriptorSet 0\n"
1533         "                              OpDecorate %dataOutput Binding 1\n"
1534         "                              OpDecorate %scalarArray ArrayStride 16\n"
1535         "                              OpDecorate %scalarArray2D ArrayStride 48\n"
1536         "                              OpMemberDecorate %S 0 Offset 0\n"
1537         "                              OpMemberDecorate %S 1 Offset 48\n"
1538         "                              ${decoration:opt}\n"
1539         "                              OpMemberDecorate %S 2 Offset 240\n"
1540         "                              OpDecorate %_arr_S_uint_4 ArrayStride 256\n"
1541         "                              OpMemberDecorate %Input 0 Offset 0\n"
1542         "                              OpMemberDecorate %Output 0 Offset 0\n"
1543         "                              OpDecorate %Input ${storage}\n"
1544         "                              OpDecorate %dataInput DescriptorSet 0\n"
1545         "                              OpDecorate %dataInput Binding 0\n"
1546         "                       %f16 = OpTypeFloat 16\n"
1547         "                       %f32 = OpTypeFloat 32\n"
1548         "                       %i16 = OpTypeInt 16 1\n"
1549         "                       %i32 = OpTypeInt 32 1\n"
1550         "                       %u16 = OpTypeInt 16 0\n"
1551         "                       %u32 = OpTypeInt 32 0\n"
1552         "                      %void = OpTypeVoid\n"
1553         "                  %voidFunc = OpTypeFunction %void\n"
1554         "        %_ptr_Function_uint = OpTypePointer Function %u32\n"
1555         "                     %v3u32 = OpTypeVector %u32 3\n"
1556         "          %_ptr_Input_v3u32 = OpTypePointer Input %v3u32\n"
1557         "                     %int_0 = OpConstant %i32 0\n"
1558         "                    %uint_3 = OpConstant %u32 3\n"
1559         "                    %uint_4 = OpConstant %u32 4\n"
1560         "                        %s0 = OpConstant %u32 ${s0}\n"
1561         "                        %s1 = OpConstant %u32 ${s1}\n"
1562         "                        %s2 = OpConstant %u32 ${s2}\n"
1563         "                        %s3 = OpConstant %u32 ${s3}\n"
1564         "                    %Output = OpTypeStruct %${type}32\n"
1565         "       %_ptr_Uniform_Output = OpTypePointer Uniform %Output\n"
1566         "                %dataOutput = OpVariable %_ptr_Uniform_Output Uniform\n"
1567         "               %scalarArray = OpTypeArray %${type}16 %uint_3\n"
1568         "                     %v3f16 = OpTypeVector %f16 3\n"
1569         "                     %v3i16 = OpTypeVector %i16 3\n"
1570         "                     %v3u16 = OpTypeVector %u16 3\n"
1571         "                    %matrix = OpTypeMatrix %v3f16 4\n"
1572         "             %scalarArray2D = OpTypeArray %scalarArray %uint_4\n"
1573         "                         %S = OpTypeStruct %scalarArray %${type2D} %v3${type}16\n"
1574         "             %_arr_S_uint_4 = OpTypeArray %S %uint_4\n"
1575         "                     %Input = OpTypeStruct %_arr_S_uint_4\n"
1576         "        %_ptr_Uniform_Input = OpTypePointer Uniform %Input\n"
1577         "                 %dataInput = OpVariable %_ptr_Uniform_Input Uniform\n"
1578         "   %_ptr_Uniform_16bit_data = OpTypePointer Uniform %${type}16\n"
1579         "   %_ptr_Uniform_32bit_data = OpTypePointer Uniform %${type}32\n"
1580         "                      %main = OpFunction %void None %voidFunc\n"
1581         "                     %entry = OpLabel\n"
1582         "                   %dataPtr = ${accessChain}\n"
1583         "                      %data = OpLoad %${type}16 %dataPtr\n"
1584         "                 %converted = ${convert}\n"
1585         "                    %outPtr = OpAccessChain %_ptr_Uniform_32bit_data %dataOutput %int_0\n"
1586         "                              OpStore %outPtr %converted\n"
1587         "                              OpReturn\n"
1588         "                              OpFunctionEnd\n");
1589 
1590     // Generate constant indices for OpChainAccess. We need to use constant values
1591     // when indexing into structures. This loop generates all permutations.
1592     for (uint32_t idx0 = 0; idx0 < 4; ++idx0)
1593         for (uint32_t idx1 = 0; idx1 < 3; ++idx1)
1594             for (uint32_t idx2 = 0; idx2 < (idx1 == 1u ? 4u : 3u); ++idx2)
1595                 for (uint32_t idx3 = 0; idx3 < (idx1 == 1u ? 3u : 1u); ++idx3)
1596                     indices.push_back(tcu::UVec4(idx0, idx1, idx2, idx3));
1597 
1598     for (uint32_t numIdx = 0; numIdx < (uint32_t)indices.size(); ++numIdx)
1599     {
1600         const uint16_t signBitMask    = 0x8000;
1601         const uint32_t signExtendMask = 0xffff0000;
1602         // Determine the selected output float for the selected indices.
1603         const tcu::UVec4 vec = indices[numIdx];
1604         // Offsets are in multiples of 16bits. Floats are using matrix as the
1605         // second field, which has different layout rules than 2D array.
1606         // Therefore separate offset tables are needed.
1607         const uint32_t fieldOffsetsFloat[3][3] = {{0u, 8u, 0u}, {24, 24u, 1u}, {120u, 1u, 0u}};
1608         const uint32_t fieldOffsetsInt[3][3]   = {{0u, 8u, 0u}, {24, 24u, 8u}, {120u, 1u, 0u}};
1609         const uint32_t offsetFloat             = vec.x() * structSize + fieldOffsetsFloat[vec.y()][0] +
1610                                      fieldOffsetsFloat[vec.y()][1] * vec.z() + fieldOffsetsFloat[vec.y()][2] * vec.w();
1611         const uint32_t offsetInt = vec.x() * structSize + fieldOffsetsInt[vec.y()][0] +
1612                                    fieldOffsetsInt[vec.y()][1] * vec.z() + fieldOffsetsInt[vec.y()][2] * vec.w();
1613         const bool hasSign = inputDataInt[offsetInt] & signBitMask;
1614 
1615         outputDataFloat.push_back(deFloat16To32(inputDataFloat[offsetFloat]));
1616         outputDataUInt.push_back((uint16_t)inputDataInt[offsetInt]);
1617         outputDataSInt.push_back((int32_t)(inputDataInt[offsetInt] | (hasSign ? signExtendMask : 0u)));
1618     }
1619 
1620     for (uint32_t indicesIdx = 0; indicesIdx < (uint32_t)indices.size(); ++indicesIdx)
1621         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1622         {
1623             string indexString = de::toString(indices[indicesIdx].x()) + "_" + de::toString(indices[indicesIdx].y()) +
1624                                  "_" + de::toString(indices[indicesIdx].z());
1625             if (indices[indicesIdx].y() == 1)
1626                 indexString += string("_") + de::toString(indices[indicesIdx].w());
1627 
1628             const string testNameBase = string(CAPABILITIES[capIdx].name) + "_" + indexString + "_";
1629 
1630             struct DataType
1631             {
1632                 string name;
1633                 string type;
1634                 string convert;
1635                 string type2D; // Matrix when using floats. 2D array otherwise.
1636                 BufferSp inputs;
1637                 BufferSp outputs;
1638             };
1639 
1640             const DataType dataTypes[] = {
1641                 {"float", "f", "OpFConvert %f32 %data", "matrix", BufferSp(new Float16Buffer(inputDataFloat)),
1642                  BufferSp(new Float32Buffer(vector<float>(1, outputDataFloat[indicesIdx])))},
1643                 {"int", "i", "OpSConvert %i32 %data", "scalarArray2D", BufferSp(new Int16Buffer(inputDataInt)),
1644                  BufferSp(new Int32Buffer(vector<int32_t>(1, outputDataSInt[indicesIdx])))},
1645                 {"uint", "u", "OpUConvert %u32 %data", "scalarArray2D", BufferSp(new Int16Buffer(inputDataInt)),
1646                  BufferSp(new Int32Buffer(vector<int32_t>(1, outputDataUInt[indicesIdx])))}};
1647 
1648             for (uint32_t dataTypeIdx = 0; dataTypeIdx < DE_LENGTH_OF_ARRAY(dataTypes); ++dataTypeIdx)
1649             {
1650                 const string testName = testNameBase + dataTypes[dataTypeIdx].name;
1651                 map<string, string> specs;
1652                 ComputeShaderSpec spec;
1653 
1654                 specs["capability"] = CAPABILITIES[capIdx].cap;
1655                 specs["storage"]    = CAPABILITIES[capIdx].decor;
1656                 specs["s0"]         = de::toString(indices[indicesIdx].x());
1657                 specs["s1"]         = de::toString(indices[indicesIdx].y());
1658                 specs["s2"]         = de::toString(indices[indicesIdx].z());
1659                 specs["s3"]         = de::toString(indices[indicesIdx].w());
1660                 specs["type"]       = dataTypes[dataTypeIdx].type;
1661                 specs["convert"]    = dataTypes[dataTypeIdx].convert;
1662                 specs["type2D"]     = dataTypes[dataTypeIdx].type2D;
1663 
1664                 if (indices[indicesIdx].y() == 1)
1665                     specs["accessChain"] = "OpAccessChain %_ptr_Uniform_16bit_data %dataInput %int_0 %s0 %s1 %s2 %s3";
1666                 else
1667                     specs["accessChain"] = "OpAccessChain %_ptr_Uniform_16bit_data %dataInput %int_0 %s0 %s1 %s2";
1668 
1669                 if (dataTypeIdx == 0)
1670                 {
1671                     spec.verifyIO       = check32BitFloats;
1672                     specs["decoration"] = "OpMemberDecorate %S 1 ColMajor\nOpMemberDecorate %S 1 MatrixStride 48\n";
1673                 }
1674 
1675                 spec.assembly      = shaderTemplate.specialize(specs);
1676                 spec.numWorkGroups = IVec3(1, 1, 1);
1677                 spec.extensions.push_back("VK_KHR_16bit_storage");
1678                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1679                 spec.inputs.push_back(Resource(dataTypes[dataTypeIdx].inputs, CAPABILITIES[capIdx].dtype));
1680                 spec.outputs.push_back(Resource(dataTypes[dataTypeIdx].outputs));
1681 
1682                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1683             }
1684         }
1685 }
1686 
addCompute16bitStoragePushConstant16To32Group(tcu::TestCaseGroup * group)1687 void addCompute16bitStoragePushConstant16To32Group(tcu::TestCaseGroup *group)
1688 {
1689     tcu::TestContext &testCtx = group->getTestContext();
1690     de::Random rnd(deStringHash(group->getName()));
1691     const int numElements = 64;
1692 
1693     const StringTemplate shaderTemplate(
1694         "OpCapability Shader\n"
1695         "OpCapability StoragePushConstant16\n"
1696         "OpExtension \"SPV_KHR_16bit_storage\"\n"
1697         "OpMemoryModel Logical GLSL450\n"
1698         "OpEntryPoint GLCompute %main \"main\" %id\n"
1699         "OpExecutionMode %main LocalSize 1 1 1\n"
1700         "OpDecorate %id BuiltIn GlobalInvocationId\n"
1701 
1702         "${stride}"
1703 
1704         "OpDecorate %PC16 Block\n"
1705         "OpMemberDecorate %PC16 0 Offset 0\n"
1706         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1707         "OpDecorate %SSBO32 BufferBlock\n"
1708         "OpDecorate %ssbo32 DescriptorSet 0\n"
1709         "OpDecorate %ssbo32 Binding 0\n"
1710 
1711         "${matrix_decor:opt}\n"
1712 
1713         "%void      = OpTypeVoid\n"
1714         "%voidf     = OpTypeFunction %void\n"
1715         "%u32       = OpTypeInt 32 0\n"
1716         "%i32       = OpTypeInt 32 1\n"
1717         "%f32       = OpTypeFloat 32\n"
1718         "%v3u32     = OpTypeVector %u32 3\n"
1719         "%uvec3ptr  = OpTypePointer Input %v3u32\n"
1720         "%i32ptr    = OpTypePointer Uniform %i32\n"
1721         "%f32ptr    = OpTypePointer Uniform %f32\n"
1722 
1723         "%zero      = OpConstant %i32 0\n"
1724         "%c_i32_1   = OpConstant %i32 1\n"
1725         "%c_i32_8   = OpConstant %i32 8\n"
1726         "%c_i32_16  = OpConstant %i32 16\n"
1727         "%c_i32_32  = OpConstant %i32 32\n"
1728         "%c_i32_64  = OpConstant %i32 64\n"
1729         "%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
1730 
1731         "%i32arr    = OpTypeArray %i32 %c_i32_64\n"
1732         "%f32arr    = OpTypeArray %f32 %c_i32_64\n"
1733 
1734         "${types}\n"
1735         "${matrix_types:opt}\n"
1736 
1737         "%PC16      = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
1738         "%pp_PC16   = OpTypePointer PushConstant %PC16\n"
1739         "%pc16      = OpVariable %pp_PC16 PushConstant\n"
1740         "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
1741         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1742         "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
1743 
1744         "%id        = OpVariable %uvec3ptr Input\n"
1745 
1746         "%main      = OpFunction %void None %voidf\n"
1747         "%label     = OpLabel\n"
1748         "%idval     = OpLoad %v3u32 %id\n"
1749         "%x         = OpCompositeExtract %u32 %idval 0\n"
1750         "%inloc     = OpAccessChain %${base16}ptr %pc16 %zero %${arrayindex} ${index0:opt}\n"
1751         "%val16     = OpLoad %${base16} %inloc\n"
1752         "%val32     = ${convert} %${base32} %val16\n"
1753         "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
1754         "             OpStore %outloc %val32\n"
1755         "${matrix_store:opt}\n"
1756         "             OpReturn\n"
1757         "             OpFunctionEnd\n");
1758 
1759     { // floats
1760         const char floatTypes[] = "%f16       = OpTypeFloat 16\n"
1761                                   "%f16ptr    = OpTypePointer PushConstant %f16\n"
1762                                   "%f16arr    = OpTypeArray %f16 %c_i32_64\n"
1763                                   "%v4f16     = OpTypeVector %f16 4\n"
1764                                   "%v4f32     = OpTypeVector %f32 4\n"
1765                                   "%v4f16ptr  = OpTypePointer PushConstant %v4f16\n"
1766                                   "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
1767                                   "%v4f16arr  = OpTypeArray %v4f16 %c_i32_16\n"
1768                                   "%v4f32arr  = OpTypeArray %v4f32 %c_i32_16\n";
1769 
1770         struct CompositeType
1771         {
1772             const char *name;
1773             const char *base32;
1774             const char *base16;
1775             const char *stride;
1776             bool useConstantIndex;
1777             unsigned constantIndex;
1778             unsigned count;
1779         };
1780 
1781         const CompositeType cTypes[] = {
1782             {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n", false, 0,
1783              numElements},
1784             {"scalar_const_idx_5", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",
1785              true, 5, numElements},
1786             {"scalar_const_idx_8", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",
1787              true, 8, numElements},
1788             {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",
1789              false, 0, numElements / 4},
1790             {"matrix", "v4f32", "v4f16",
1791              "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", false, 0,
1792              numElements / 8},
1793         };
1794 
1795         vector<deFloat16> float16Data = getFloat16s(rnd, numElements);
1796         vector<float> float32Data;
1797 
1798         float32Data.reserve(numElements);
1799         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
1800             float32Data.push_back(deFloat16To32(float16Data[numIdx]));
1801 
1802         for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1803         {
1804             ComputeShaderSpec spec;
1805             map<string, string> specs;
1806             string testName = string(cTypes[tyIdx].name) + "_float";
1807 
1808             vector<float> float32DataConstIdx;
1809             if (cTypes[tyIdx].useConstantIndex)
1810             {
1811                 const uint32_t numFloats = numElements / cTypes[tyIdx].count;
1812                 for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
1813                     float32DataConstIdx.push_back(
1814                         float32Data[cTypes[tyIdx].constantIndex * numFloats + numIdx % numFloats]);
1815             }
1816 
1817             specs["stride"]        = cTypes[tyIdx].stride;
1818             specs["base32"]        = cTypes[tyIdx].base32;
1819             specs["base16"]        = cTypes[tyIdx].base16;
1820             specs["types"]         = floatTypes;
1821             specs["convert"]       = "OpFConvert";
1822             specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex);
1823             if (cTypes[tyIdx].useConstantIndex)
1824                 specs["arrayindex"] = "c_i32_ci";
1825             else
1826                 specs["arrayindex"] = "x";
1827 
1828             if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
1829             {
1830                 specs["index0"]        = "%zero";
1831                 specs["matrix_prefix"] = "m2";
1832                 specs["matrix_types"]  = "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
1833                                          "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
1834                                          "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
1835                                          "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_8\n";
1836                 specs["matrix_decor"]  = "OpMemberDecorate %SSBO32 0 ColMajor\n"
1837                                          "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1838                                          "OpMemberDecorate %PC16 0 ColMajor\n"
1839                                          "OpMemberDecorate %PC16 0 MatrixStride 8\n";
1840                 specs["matrix_store"]  = "%inloc_1  = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
1841                                          "%val16_1  = OpLoad %v4f16 %inloc_1\n"
1842                                          "%val32_1  = OpFConvert %v4f32 %val16_1\n"
1843                                          "%outloc_1 = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
1844                                          "            OpStore %outloc_1 %val32_1\n";
1845             }
1846 
1847             spec.assembly      = shaderTemplate.specialize(specs);
1848             spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
1849             spec.verifyIO      = check32BitFloats;
1850             spec.pushConstants = BufferSp(new Float16Buffer(float16Data));
1851 
1852             spec.outputs.push_back(Resource(
1853                 BufferSp(new Float32Buffer(cTypes[tyIdx].useConstantIndex ? float32DataConstIdx : float32Data))));
1854             spec.extensions.push_back("VK_KHR_16bit_storage");
1855             spec.requestedVulkanFeatures.ext16BitStorage.storagePushConstant16 = true;
1856 
1857             group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1858         }
1859     }
1860     { // integers
1861         const char sintTypes[] = "%i16       = OpTypeInt 16 1\n"
1862                                  "%i16ptr    = OpTypePointer PushConstant %i16\n"
1863                                  "%i16arr    = OpTypeArray %i16 %c_i32_64\n"
1864                                  "%v2i16     = OpTypeVector %i16 2\n"
1865                                  "%v2i32     = OpTypeVector %i32 2\n"
1866                                  "%v2i16ptr  = OpTypePointer PushConstant %v2i16\n"
1867                                  "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
1868                                  "%v2i16arr  = OpTypeArray %v2i16 %c_i32_32\n"
1869                                  "%v2i32arr  = OpTypeArray %v2i32 %c_i32_32\n";
1870 
1871         const char uintTypes[] = "%u16       = OpTypeInt 16 0\n"
1872                                  "%u16ptr    = OpTypePointer PushConstant %u16\n"
1873                                  "%u32ptr    = OpTypePointer Uniform %u32\n"
1874                                  "%u16arr    = OpTypeArray %u16 %c_i32_64\n"
1875                                  "%u32arr    = OpTypeArray %u32 %c_i32_64\n"
1876                                  "%v2u16     = OpTypeVector %u16 2\n"
1877                                  "%v2u32     = OpTypeVector %u32 2\n"
1878                                  "%v2u16ptr  = OpTypePointer PushConstant %v2u16\n"
1879                                  "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
1880                                  "%v2u16arr  = OpTypeArray %v2u16 %c_i32_32\n"
1881                                  "%v2u32arr  = OpTypeArray %v2u32 %c_i32_32\n";
1882 
1883         struct CompositeType
1884         {
1885             const char *name;
1886             bool isSigned;
1887             const char *types;
1888             const char *base32;
1889             const char *base16;
1890             const char *opcode;
1891             const char *stride;
1892             bool useConstantIndex;
1893             unsigned constantIndex;
1894             unsigned count;
1895         };
1896 
1897         const CompositeType cTypes[] = {
1898             {"scalar_sint", true, sintTypes, "i32", "i16", "OpSConvert",
1899              "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", false, 0, numElements},
1900             {"scalar_sint_const_idx_5", true, sintTypes, "i32", "i16", "OpSConvert",
1901              "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 5, numElements},
1902             {"scalar_sint_const_idx_8", true, sintTypes, "i32", "i16", "OpSConvert",
1903              "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", true, 8, numElements},
1904             {"scalar_uint", false, uintTypes, "u32", "u16", "OpUConvert",
1905              "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", false, 0, numElements},
1906             {"scalar_uint_const_idx_5", false, uintTypes, "u32", "u16", "OpUConvert",
1907              "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 5, numElements},
1908             {"scalar_uint_const_idx_8", false, uintTypes, "u32", "u16", "OpUConvert",
1909              "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", true, 8, numElements},
1910             {"vector_sint", true, sintTypes, "v2i32", "v2i16", "OpSConvert",
1911              "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", false, 0, numElements / 2},
1912             {"vector_uint", false, uintTypes, "v2u32", "v2u16", "OpUConvert",
1913              "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", false, 0, numElements / 2},
1914         };
1915 
1916         vector<int16_t> inputs = getInt16s(rnd, numElements);
1917         vector<int32_t> sOutputs;
1918         vector<int32_t> uOutputs;
1919         const uint16_t signBitMask    = 0x8000;
1920         const uint32_t signExtendMask = 0xffff0000;
1921 
1922         sOutputs.reserve(inputs.size());
1923         uOutputs.reserve(inputs.size());
1924 
1925         for (uint32_t numNdx = 0; numNdx < inputs.size(); ++numNdx)
1926         {
1927             uOutputs.push_back(static_cast<uint16_t>(inputs[numNdx]));
1928             if (inputs[numNdx] & signBitMask)
1929                 sOutputs.push_back(static_cast<int32_t>(inputs[numNdx] | signExtendMask));
1930             else
1931                 sOutputs.push_back(static_cast<int32_t>(inputs[numNdx]));
1932         }
1933 
1934         for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1935         {
1936             ComputeShaderSpec spec;
1937             map<string, string> specs;
1938             const char *testName = cTypes[tyIdx].name;
1939             vector<int32_t> intDataConstIdx;
1940 
1941             if (cTypes[tyIdx].useConstantIndex)
1942             {
1943                 const uint32_t numInts = numElements / cTypes[tyIdx].count;
1944 
1945                 for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
1946                 {
1947                     const int32_t idx = cTypes[tyIdx].constantIndex * numInts + numIdx % numInts;
1948 
1949                     if (cTypes[tyIdx].isSigned)
1950                         intDataConstIdx.push_back(sOutputs[idx]);
1951                     else
1952                         intDataConstIdx.push_back(uOutputs[idx]);
1953                 }
1954             }
1955 
1956             specs["stride"]        = cTypes[tyIdx].stride;
1957             specs["base32"]        = cTypes[tyIdx].base32;
1958             specs["base16"]        = cTypes[tyIdx].base16;
1959             specs["types"]         = cTypes[tyIdx].types;
1960             specs["convert"]       = cTypes[tyIdx].opcode;
1961             specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex);
1962             if (cTypes[tyIdx].useConstantIndex)
1963                 specs["arrayindex"] = "c_i32_ci";
1964             else
1965                 specs["arrayindex"] = "x";
1966 
1967             spec.assembly      = shaderTemplate.specialize(specs);
1968             spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
1969             spec.pushConstants = BufferSp(new Int16Buffer(inputs));
1970 
1971             if (cTypes[tyIdx].useConstantIndex)
1972                 spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(intDataConstIdx))));
1973             else if (cTypes[tyIdx].isSigned)
1974                 spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(sOutputs))));
1975             else
1976                 spec.outputs.push_back(Resource(BufferSp(new Int32Buffer(uOutputs))));
1977             spec.extensions.push_back("VK_KHR_16bit_storage");
1978             spec.requestedVulkanFeatures.ext16BitStorage.storagePushConstant16 = true;
1979 
1980             group->addChild(new SpvAsmComputeShaderCase(testCtx, testName, spec));
1981         }
1982     }
1983 }
1984 
addGraphics16BitStorageUniformInt32To16Group(tcu::TestCaseGroup * testGroup)1985 void addGraphics16BitStorageUniformInt32To16Group(tcu::TestCaseGroup *testGroup)
1986 {
1987     de::Random rnd(deStringHash(testGroup->getName()));
1988     map<string, string> fragments;
1989     const uint32_t numDataPoints = 256;
1990     RGBA defaultColors[4];
1991     vector<string> extensions;
1992     const StringTemplate capabilities("OpCapability ${cap}\n");
1993     // inputs and outputs are declared to be vectors of signed integers.
1994     // However, depending on the test, they may be interpreted as unsiged
1995     // integers. That won't be a problem as long as we passed the bits
1996     // in faithfully to the pipeline.
1997     vector<int32_t> inputs = getInt32s(rnd, numDataPoints);
1998     vector<int16_t> outputs;
1999 
2000     outputs.reserve(inputs.size());
2001     for (uint32_t numNdx = 0; numNdx < inputs.size(); ++numNdx)
2002         outputs.push_back(static_cast<int16_t>(0xffff & inputs[numNdx]));
2003 
2004     extensions.push_back("VK_KHR_16bit_storage");
2005     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
2006 
2007     getDefaultColors(defaultColors);
2008 
2009     struct IntegerFacts
2010     {
2011         const char *name;
2012         const char *type32;
2013         const char *type16;
2014         const char *opcode;
2015         const char *isSigned;
2016     };
2017 
2018     const IntegerFacts intFacts[] = {
2019         {"sint", "%i32", "%i16", "OpSConvert", "1"},
2020         {"uint", "%u32", "%u16", "OpUConvert", "0"},
2021     };
2022 
2023     const StringTemplate scalarPreMain("${itype16} = OpTypeInt 16 ${signed}\n"
2024                                        "%c_i32_256 = OpConstant %i32 256\n"
2025                                        "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
2026                                        "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
2027                                        "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
2028                                        "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
2029                                        "   %SSBO32 = OpTypeStruct %ra_i32\n"
2030                                        "   %SSBO16 = OpTypeStruct %ra_i16\n"
2031                                        "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2032                                        "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2033                                        "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2034                                        "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2035 
2036     const StringTemplate scalarDecoration("OpDecorate %ra_i32 ArrayStride ${arraystride}\n"
2037                                           "OpDecorate %ra_i16 ArrayStride 2\n"
2038                                           "OpMemberDecorate %SSBO32 0 Offset 0\n"
2039                                           "OpMemberDecorate %SSBO16 0 Offset 0\n"
2040                                           "OpDecorate %SSBO32 ${indecor}\n"
2041                                           "OpDecorate %SSBO16 BufferBlock\n"
2042                                           "OpDecorate %ssbo32 DescriptorSet 0\n"
2043                                           "OpDecorate %ssbo16 DescriptorSet 0\n"
2044                                           "OpDecorate %ssbo32 Binding 0\n"
2045                                           "OpDecorate %ssbo16 Binding 1\n");
2046 
2047     const StringTemplate scalarTestFunc("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
2048                                         "    %param = OpFunctionParameter %v4f32\n"
2049 
2050                                         "%entry = OpLabel\n"
2051                                         "    %i = OpVariable %fp_i32 Function\n"
2052                                         "         OpStore %i %c_i32_0\n"
2053                                         "         OpBranch %loop\n"
2054 
2055                                         " %loop = OpLabel\n"
2056                                         "   %15 = OpLoad %i32 %i\n"
2057                                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2058                                         "         OpLoopMerge %merge %inc None\n"
2059                                         "         OpBranchConditional %lt %write %merge\n"
2060 
2061                                         "%write = OpLabel\n"
2062                                         "   %30 = OpLoad %i32 %i\n"
2063                                         "  %src = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
2064                                         "%val32 = OpLoad ${itype32} %src\n"
2065                                         "%val16 = ${convert} ${itype16} %val32\n"
2066                                         "  %dst = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
2067                                         "         OpStore %dst %val16\n"
2068                                         "         OpBranch %inc\n"
2069 
2070                                         "  %inc = OpLabel\n"
2071                                         "   %37 = OpLoad %i32 %i\n"
2072                                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2073                                         "         OpStore %i %39\n"
2074                                         "         OpBranch %loop\n"
2075 
2076                                         "%merge = OpLabel\n"
2077                                         "         OpReturnValue %param\n"
2078 
2079                                         "OpFunctionEnd\n");
2080 
2081     const StringTemplate vecPreMain("${itype16} = OpTypeInt 16 ${signed}\n"
2082                                     " %c_i32_64 = OpConstant %i32 64\n"
2083                                     "%v4itype16 = OpTypeVector ${itype16} 4\n"
2084                                     " %up_v4i32 = OpTypePointer Uniform ${v4itype32}\n"
2085                                     " %up_v4i16 = OpTypePointer Uniform %v4itype16\n"
2086                                     " %ra_v4i32 = OpTypeArray ${v4itype32} %c_i32_64\n"
2087                                     " %ra_v4i16 = OpTypeArray %v4itype16 %c_i32_64\n"
2088                                     "   %SSBO32 = OpTypeStruct %ra_v4i32\n"
2089                                     "   %SSBO16 = OpTypeStruct %ra_v4i16\n"
2090                                     "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2091                                     "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2092                                     "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2093                                     "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2094 
2095     const StringTemplate vecDecoration("OpDecorate %ra_v4i32 ArrayStride 16\n"
2096                                        "OpDecorate %ra_v4i16 ArrayStride 8\n"
2097                                        "OpMemberDecorate %SSBO32 0 Offset 0\n"
2098                                        "OpMemberDecorate %SSBO16 0 Offset 0\n"
2099                                        "OpDecorate %SSBO32 ${indecor}\n"
2100                                        "OpDecorate %SSBO16 BufferBlock\n"
2101                                        "OpDecorate %ssbo32 DescriptorSet 0\n"
2102                                        "OpDecorate %ssbo16 DescriptorSet 0\n"
2103                                        "OpDecorate %ssbo32 Binding 0\n"
2104                                        "OpDecorate %ssbo16 Binding 1\n");
2105 
2106     const StringTemplate vecTestFunc("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
2107                                      "    %param = OpFunctionParameter %v4f32\n"
2108 
2109                                      "%entry = OpLabel\n"
2110                                      "    %i = OpVariable %fp_i32 Function\n"
2111                                      "         OpStore %i %c_i32_0\n"
2112                                      "         OpBranch %loop\n"
2113 
2114                                      " %loop = OpLabel\n"
2115                                      "   %15 = OpLoad %i32 %i\n"
2116                                      "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
2117                                      "         OpLoopMerge %merge %inc None\n"
2118                                      "         OpBranchConditional %lt %write %merge\n"
2119 
2120                                      "%write = OpLabel\n"
2121                                      "   %30 = OpLoad %i32 %i\n"
2122                                      "  %src = OpAccessChain %up_v4i32 %ssbo32 %c_i32_0 %30\n"
2123                                      "%val32 = OpLoad ${v4itype32} %src\n"
2124                                      "%val16 = ${convert} %v4itype16 %val32\n"
2125                                      "  %dst = OpAccessChain %up_v4i16 %ssbo16 %c_i32_0 %30\n"
2126                                      "         OpStore %dst %val16\n"
2127                                      "         OpBranch %inc\n"
2128 
2129                                      "  %inc = OpLabel\n"
2130                                      "   %37 = OpLoad %i32 %i\n"
2131                                      "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2132                                      "         OpStore %i %39\n"
2133                                      "         OpBranch %loop\n"
2134 
2135                                      "%merge = OpLabel\n"
2136                                      "         OpReturnValue %param\n"
2137 
2138                                      "OpFunctionEnd\n");
2139 
2140     // Scalar
2141     {
2142         const uint32_t arrayStrides[] = {4, 16};
2143 
2144         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2145             for (uint32_t factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
2146             {
2147                 map<string, string> specs;
2148                 string name = string(CAPABILITIES[capIdx].name) + "_scalar_" + intFacts[factIdx].name;
2149 
2150                 specs["cap"]         = CAPABILITIES[capIdx].cap;
2151                 specs["indecor"]     = CAPABILITIES[capIdx].decor;
2152                 specs["itype32"]     = intFacts[factIdx].type32;
2153                 specs["v4itype32"]   = "%v4" + string(intFacts[factIdx].type32).substr(1);
2154                 specs["itype16"]     = intFacts[factIdx].type16;
2155                 specs["signed"]      = intFacts[factIdx].isSigned;
2156                 specs["convert"]     = intFacts[factIdx].opcode;
2157                 specs["arraystride"] = de::toString(arrayStrides[capIdx]);
2158 
2159                 fragments["pre_main"]   = scalarPreMain.specialize(specs);
2160                 fragments["testfun"]    = scalarTestFunc.specialize(specs);
2161                 fragments["capability"] = capabilities.specialize(specs);
2162                 fragments["decoration"] = scalarDecoration.specialize(specs);
2163 
2164                 vector<int32_t> inputsPadded;
2165                 for (size_t dataIdx = 0; dataIdx < inputs.size(); ++dataIdx)
2166                 {
2167                     inputsPadded.push_back(inputs[dataIdx]);
2168                     for (uint32_t padIdx = 0; padIdx < arrayStrides[capIdx] / 4 - 1; ++padIdx)
2169                         inputsPadded.push_back(0);
2170                 }
2171 
2172                 GraphicsResources resources;
2173                 VulkanFeatures features;
2174 
2175                 resources.inputs.push_back(
2176                     Resource(BufferSp(new Int32Buffer(inputsPadded)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2177                 resources.outputs.push_back(
2178                     Resource(BufferSp(new Int16Buffer(outputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2179                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
2180 
2181                 features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2182                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
2183                 features.coreFeatures.fragmentStoresAndAtomics       = true;
2184 
2185                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup,
2186                                         features);
2187             }
2188     }
2189     // Vector
2190     {
2191         GraphicsResources resources;
2192         resources.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2193         resources.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
2194 
2195         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2196             for (uint32_t factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
2197             {
2198                 map<string, string> specs;
2199                 string name = string(CAPABILITIES[capIdx].name) + "_vector_" + intFacts[factIdx].name;
2200                 VulkanFeatures features;
2201 
2202                 specs["cap"]       = CAPABILITIES[capIdx].cap;
2203                 specs["indecor"]   = CAPABILITIES[capIdx].decor;
2204                 specs["itype32"]   = intFacts[factIdx].type32;
2205                 specs["v4itype32"] = "%v4" + string(intFacts[factIdx].type32).substr(1);
2206                 specs["itype16"]   = intFacts[factIdx].type16;
2207                 specs["signed"]    = intFacts[factIdx].isSigned;
2208                 specs["convert"]   = intFacts[factIdx].opcode;
2209 
2210                 fragments["pre_main"]   = vecPreMain.specialize(specs);
2211                 fragments["testfun"]    = vecTestFunc.specialize(specs);
2212                 fragments["capability"] = capabilities.specialize(specs);
2213                 fragments["decoration"] = vecDecoration.specialize(specs);
2214 
2215                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
2216 
2217                 features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2218                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
2219                 features.coreFeatures.fragmentStoresAndAtomics       = true;
2220 
2221                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup,
2222                                         features);
2223             }
2224     }
2225 }
2226 
addCompute16bitStorageUniform16To16Group(tcu::TestCaseGroup * group)2227 void addCompute16bitStorageUniform16To16Group(tcu::TestCaseGroup *group)
2228 {
2229     tcu::TestContext &testCtx = group->getTestContext();
2230     de::Random rnd(deStringHash(group->getName()));
2231     const int numElements               = 128;
2232     const vector<deFloat16> float16Data = getFloat16s(rnd, numElements);
2233     const vector<deFloat16> float16UnusedData(numElements, 0);
2234     ComputeShaderSpec spec;
2235 
2236     std::ostringstream shaderTemplate;
2237     shaderTemplate << "OpCapability Shader\n"
2238                    << "OpCapability StorageUniformBufferBlock16\n"
2239                    << "OpExtension \"SPV_KHR_16bit_storage\"\n"
2240                    << "OpMemoryModel Logical GLSL450\n"
2241                    << "OpEntryPoint GLCompute %main \"main\" %id\n"
2242                    << "OpExecutionMode %main LocalSize 1 1 1\n"
2243                    << "OpDecorate %id BuiltIn GlobalInvocationId\n"
2244                    << "OpDecorate %f16arr ArrayStride 2\n"
2245                    << "OpMemberDecorate %SSBO_IN 0 Coherent\n"
2246                    << "OpMemberDecorate %SSBO_OUT 0 Coherent\n"
2247                    << "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
2248                    << "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
2249                    << "OpDecorate %SSBO_IN BufferBlock\n"
2250                    << "OpDecorate %SSBO_OUT BufferBlock\n"
2251                    << "OpDecorate %ssboIN DescriptorSet 0\n"
2252                    << "OpDecorate %ssboOUT DescriptorSet 0\n"
2253                    << "OpDecorate %ssboIN Binding 0\n"
2254                    << "OpDecorate %ssboOUT Binding 1\n"
2255                    << "\n"
2256                    << "%bool      = OpTypeBool\n"
2257                    << "%void      = OpTypeVoid\n"
2258                    << "%voidf     = OpTypeFunction %void\n"
2259                    << "%u32       = OpTypeInt 32 0\n"
2260                    << "%i32       = OpTypeInt 32 1\n"
2261                    << "%uvec3     = OpTypeVector %u32 3\n"
2262                    << "%uvec3ptr  = OpTypePointer Input %uvec3\n"
2263                    << "%f16       = OpTypeFloat 16\n"
2264                    << "%f16ptr    = OpTypePointer Uniform %f16\n"
2265                    << "\n"
2266                    << "%zero      = OpConstant %i32 0\n"
2267                    << "%c_size    = OpConstant %i32 " << numElements << "\n"
2268                    << "\n"
2269                    << "%f16arr    = OpTypeArray %f16 %c_size\n"
2270                    << "%SSBO_IN   = OpTypeStruct %f16arr\n"
2271                    << "%SSBO_OUT  = OpTypeStruct %f16arr\n"
2272                    << "%up_SSBOIN = OpTypePointer Uniform %SSBO_IN\n"
2273                    << "%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
2274                    << "%ssboIN    = OpVariable %up_SSBOIN Uniform\n"
2275                    << "%ssboOUT   = OpVariable %up_SSBOOUT Uniform\n"
2276                    << "\n"
2277                    << "%id        = OpVariable %uvec3ptr Input\n"
2278                    << "%main      = OpFunction %void None %voidf\n"
2279                    << "%label     = OpLabel\n"
2280                    << "%idval     = OpLoad %uvec3 %id\n"
2281                    << "%x         = OpCompositeExtract %u32 %idval 0\n"
2282                    << "%y         = OpCompositeExtract %u32 %idval 1\n"
2283                    << "\n"
2284                    << "%inlocx     = OpAccessChain %f16ptr %ssboIN %zero %x \n"
2285                    << "%valx       = OpLoad %f16 %inlocx\n"
2286                    << "%outlocx    = OpAccessChain %f16ptr %ssboOUT %zero %x \n"
2287                    << "             OpStore %outlocx %valx\n"
2288 
2289                    << "%inlocy    = OpAccessChain %f16ptr %ssboIN %zero %y \n"
2290                    << "%valy      = OpLoad %f16 %inlocy\n"
2291                    << "%outlocy   = OpAccessChain %f16ptr %ssboOUT %zero %y \n"
2292                    << "             OpStore %outlocy %valy\n"
2293                    << "\n"
2294                    << "             OpReturn\n"
2295                    << "             OpFunctionEnd\n";
2296 
2297     spec.assembly       = shaderTemplate.str();
2298     spec.numWorkGroups  = IVec3(numElements, numElements, 1);
2299     spec.verifyIO       = computeCheckBuffersFloats;
2300     spec.coherentMemory = true;
2301     spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data))));
2302     spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedData))));
2303     spec.extensions.push_back("VK_KHR_16bit_storage");
2304     spec.requestedVulkanFeatures = get16BitStorageFeatures("uniform_buffer_block");
2305 
2306     group->addChild(new SpvAsmComputeShaderCase(testCtx, "stress_test", spec));
2307 }
2308 
addCompute16bitStorageUniform32To16Group(tcu::TestCaseGroup * group)2309 void addCompute16bitStorageUniform32To16Group(tcu::TestCaseGroup *group)
2310 {
2311     tcu::TestContext &testCtx = group->getTestContext();
2312     de::Random rnd(deStringHash(group->getName()));
2313     const int numElements = 128;
2314 
2315     const StringTemplate shaderTemplate("OpCapability Shader\n"
2316                                         "OpCapability ${capability}\n"
2317                                         "OpExtension \"SPV_KHR_16bit_storage\"\n"
2318                                         "OpMemoryModel Logical GLSL450\n"
2319                                         "OpEntryPoint GLCompute %main \"main\" %id\n"
2320                                         "OpExecutionMode %main LocalSize 1 1 1\n"
2321                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2322 
2323                                         "${stride}"
2324 
2325                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2326                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2327                                         "OpDecorate %SSBO32 ${storage}\n"
2328                                         "OpDecorate %SSBO16 BufferBlock\n"
2329                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2330                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2331                                         "OpDecorate %ssbo32 Binding 0\n"
2332                                         "OpDecorate %ssbo16 Binding 1\n"
2333 
2334                                         "${matrix_decor:opt}\n"
2335 
2336                                         "${rounding:opt}\n"
2337 
2338                                         "%bool      = OpTypeBool\n"
2339                                         "%void      = OpTypeVoid\n"
2340                                         "%voidf     = OpTypeFunction %void\n"
2341                                         "%u32       = OpTypeInt 32 0\n"
2342                                         "%i32       = OpTypeInt 32 1\n"
2343                                         "%f32       = OpTypeFloat 32\n"
2344                                         "%uvec3     = OpTypeVector %u32 3\n"
2345                                         "%uvec3ptr  = OpTypePointer Input %uvec3\n"
2346                                         "%i32ptr    = OpTypePointer Uniform %i32\n"
2347                                         "%f32ptr    = OpTypePointer Uniform %f32\n"
2348 
2349                                         "%zero      = OpConstant %i32 0\n"
2350                                         "%c_i32_1   = OpConstant %i32 1\n"
2351                                         "%c_i32_16  = OpConstant %i32 16\n"
2352                                         "%c_i32_32  = OpConstant %i32 32\n"
2353                                         "%c_i32_64  = OpConstant %i32 64\n"
2354                                         "%c_i32_128 = OpConstant %i32 128\n"
2355 
2356                                         "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
2357                                         "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
2358 
2359                                         "${types}\n"
2360                                         "${matrix_types:opt}\n"
2361 
2362                                         "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
2363                                         "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
2364                                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2365                                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2366                                         "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
2367                                         "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
2368 
2369                                         "%id        = OpVariable %uvec3ptr Input\n"
2370 
2371                                         "%main      = OpFunction %void None %voidf\n"
2372                                         "%label     = OpLabel\n"
2373                                         "%idval     = OpLoad %uvec3 %id\n"
2374                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2375                                         "%inloc     = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
2376                                         "%val32     = OpLoad %${base32} %inloc\n"
2377                                         "%val16     = ${convert} %${base16} %val32\n"
2378                                         "%outloc    = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
2379                                         "             OpStore %outloc %val16\n"
2380                                         "${matrix_store:opt}\n"
2381                                         "             OpReturn\n"
2382                                         "             OpFunctionEnd\n");
2383 
2384     { // Floats
2385         const char floatTypes[] = "%f16       = OpTypeFloat 16\n"
2386                                   "%f16ptr    = OpTypePointer Uniform %f16\n"
2387                                   "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
2388                                   "%v4f16     = OpTypeVector %f16 4\n"
2389                                   "%v4f32     = OpTypeVector %f32 4\n"
2390                                   "%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
2391                                   "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
2392                                   "%v4f16arr  = OpTypeArray %v4f16 %c_i32_32\n"
2393                                   "%v4f32arr  = OpTypeArray %v4f32 %c_i32_32\n";
2394 
2395         struct RndMode
2396         {
2397             const char *name;
2398             const char *decor;
2399             VerifyIOFunc func;
2400         };
2401 
2402         const RndMode rndModes[] = {
2403             {"rtz", "OpDecorate %val16  FPRoundingMode RTZ", computeCheck16BitFloats<ROUNDINGMODE_RTZ>},
2404             {"rte", "OpDecorate %val16  FPRoundingMode RTE", computeCheck16BitFloats<ROUNDINGMODE_RTE>},
2405             {"unspecified_rnd_mode", "",
2406              computeCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
2407         };
2408 
2409         struct CompositeType
2410         {
2411             const char *name;
2412             const char *base32;
2413             const char *base16;
2414             const char *stride;
2415             unsigned count;
2416             unsigned inputStride;
2417         };
2418 
2419         const CompositeType cTypes[2][3] = {
2420             {// BufferBlock
2421              {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",
2422               numElements, 1},
2423              {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",
2424               numElements / 4, 1},
2425              {"matrix", "v4f32", "v4f16",
2426               "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1}},
2427             {// Block
2428              {"scalar", "f32", "f16", "OpDecorate %f32arr ArrayStride 16\nOpDecorate %f16arr ArrayStride 2\n",
2429               numElements, 4},
2430              {"vector", "v4f32", "v4f16", "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",
2431               numElements / 4, 1},
2432              {"matrix", "v4f32", "v4f16",
2433               "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8, 1}}};
2434 
2435         vector<deFloat16> float16UnusedData(numElements, 0);
2436 
2437         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2438             for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
2439                 for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
2440                 {
2441                     ComputeShaderSpec spec;
2442                     map<string, string> specs;
2443                     string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name + "_float_" +
2444                                       rndModes[rndModeIdx].name;
2445                     vector<float> float32Data = getFloat32s(rnd, numElements * cTypes[capIdx][tyIdx].inputStride);
2446 
2447                     specs["capability"] = CAPABILITIES[capIdx].cap;
2448                     specs["storage"]    = CAPABILITIES[capIdx].decor;
2449                     specs["stride"]     = cTypes[capIdx][tyIdx].stride;
2450                     specs["base32"]     = cTypes[capIdx][tyIdx].base32;
2451                     specs["base16"]     = cTypes[capIdx][tyIdx].base16;
2452                     specs["rounding"]   = rndModes[rndModeIdx].decor;
2453                     specs["types"]      = floatTypes;
2454                     specs["convert"]    = "OpFConvert";
2455 
2456                     if (strcmp(cTypes[capIdx][tyIdx].name, "matrix") == 0)
2457                     {
2458                         if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
2459                             specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTZ\n";
2460                         else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
2461                             specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTE\n";
2462 
2463                         specs["index0"]        = "%zero";
2464                         specs["matrix_prefix"] = "m2";
2465                         specs["matrix_types"]  = "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
2466                                                  "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
2467                                                  "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
2468                                                  "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_16\n";
2469                         specs["matrix_decor"]  = "OpMemberDecorate %SSBO32 0 ColMajor\n"
2470                                                  "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
2471                                                  "OpMemberDecorate %SSBO16 0 ColMajor\n"
2472                                                  "OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
2473                         specs["matrix_store"]  = "%inloc_1  = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
2474                                                  "%val32_1  = OpLoad %v4f32 %inloc_1\n"
2475                                                  "%val16_1  = OpFConvert %v4f16 %val32_1\n"
2476                                                  "%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
2477                                                  "            OpStore %outloc_1 %val16_1\n";
2478                     }
2479 
2480                     spec.assembly      = shaderTemplate.specialize(specs);
2481                     spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
2482                     spec.verifyIO      = rndModes[rndModeIdx].func;
2483 
2484                     spec.inputs.push_back(
2485                         Resource(BufferSp(new Float32Buffer(float32Data)), CAPABILITIES[capIdx].dtype));
2486                     // We provided a custom verifyIO in the above in which inputs will be used for checking.
2487                     // So put unused data in the expected values.
2488                     spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedData))));
2489                     spec.extensions.push_back("VK_KHR_16bit_storage");
2490                     spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2491 
2492                     group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
2493                 }
2494     }
2495 
2496     { // Integers
2497         const char sintTypes[] = "%i16       = OpTypeInt 16 1\n"
2498                                  "%i16ptr    = OpTypePointer Uniform %i16\n"
2499                                  "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
2500                                  "%v2i16     = OpTypeVector %i16 2\n"
2501                                  "%v2i32     = OpTypeVector %i32 2\n"
2502                                  "%v2i16ptr  = OpTypePointer Uniform %v2i16\n"
2503                                  "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
2504                                  "%v2i16arr  = OpTypeArray %v2i16 %c_i32_64\n"
2505                                  "%v2i32arr  = OpTypeArray %v2i32 %c_i32_64\n";
2506 
2507         const char uintTypes[] = "%u16       = OpTypeInt 16 0\n"
2508                                  "%u16ptr    = OpTypePointer Uniform %u16\n"
2509                                  "%u32ptr    = OpTypePointer Uniform %u32\n"
2510                                  "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
2511                                  "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
2512                                  "%v2u16     = OpTypeVector %u16 2\n"
2513                                  "%v2u32     = OpTypeVector %u32 2\n"
2514                                  "%v2u16ptr  = OpTypePointer Uniform %v2u16\n"
2515                                  "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
2516                                  "%v2u16arr  = OpTypeArray %v2u16 %c_i32_64\n"
2517                                  "%v2u32arr  = OpTypeArray %v2u32 %c_i32_64\n";
2518 
2519         struct CompositeType
2520         {
2521             const char *name;
2522             const char *types;
2523             const char *base32;
2524             const char *base16;
2525             const char *opcode;
2526             const char *stride;
2527             unsigned count;
2528             unsigned inputStride;
2529         };
2530 
2531         const CompositeType cTypes[2][4] = {
2532             {{"scalar_sint", sintTypes, "i32", "i16", "OpSConvert",
2533               "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n", numElements, 1},
2534              {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert",
2535               "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n", numElements, 1},
2536              {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert",
2537               "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 2},
2538              {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert",
2539               "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 2}},
2540             {{"scalar_sint", sintTypes, "i32", "i16", "OpSConvert",
2541               "OpDecorate %i32arr ArrayStride 16\nOpDecorate %i16arr ArrayStride 2\n", numElements, 4},
2542              {"scalar_uint", uintTypes, "u32", "u16", "OpUConvert",
2543               "OpDecorate %u32arr ArrayStride 16\nOpDecorate %u16arr ArrayStride 2\n", numElements, 4},
2544              {"vector_sint", sintTypes, "v2i32", "v2i16", "OpSConvert",
2545               "OpDecorate %v2i32arr ArrayStride 16\nOpDecorate %v2i16arr ArrayStride 4\n", numElements / 2, 4},
2546              {"vector_uint", uintTypes, "v2u32", "v2u16", "OpUConvert",
2547               "OpDecorate %v2u32arr ArrayStride 16\nOpDecorate %v2u16arr ArrayStride 4\n", numElements / 2, 4}}};
2548 
2549         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2550             for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes[capIdx]); ++tyIdx)
2551             {
2552                 ComputeShaderSpec spec;
2553                 map<string, string> specs;
2554                 string testName               = string(CAPABILITIES[capIdx].name) + "_" + cTypes[capIdx][tyIdx].name;
2555                 const uint32_t inputStride    = cTypes[capIdx][tyIdx].inputStride;
2556                 const uint32_t count          = cTypes[capIdx][tyIdx].count;
2557                 const uint32_t scalarsPerItem = numElements / count;
2558 
2559                 vector<int32_t> inputs = getInt32s(rnd, numElements * inputStride);
2560                 vector<int16_t> outputs;
2561 
2562                 outputs.reserve(numElements);
2563                 for (uint32_t numNdx = 0; numNdx < count; ++numNdx)
2564                     for (uint32_t scalarIdx = 0; scalarIdx < scalarsPerItem; scalarIdx++)
2565                         outputs.push_back(static_cast<int16_t>(0xffff & inputs[numNdx * inputStride + scalarIdx]));
2566 
2567                 specs["capability"] = CAPABILITIES[capIdx].cap;
2568                 specs["storage"]    = CAPABILITIES[capIdx].decor;
2569                 specs["stride"]     = cTypes[capIdx][tyIdx].stride;
2570                 specs["base32"]     = cTypes[capIdx][tyIdx].base32;
2571                 specs["base16"]     = cTypes[capIdx][tyIdx].base16;
2572                 specs["types"]      = cTypes[capIdx][tyIdx].types;
2573                 specs["convert"]    = cTypes[capIdx][tyIdx].opcode;
2574 
2575                 spec.assembly      = shaderTemplate.specialize(specs);
2576                 spec.numWorkGroups = IVec3(cTypes[capIdx][tyIdx].count, 1, 1);
2577 
2578                 spec.inputs.push_back(Resource(BufferSp(new Int32Buffer(inputs)), CAPABILITIES[capIdx].dtype));
2579                 spec.outputs.push_back(Resource(BufferSp(new Int16Buffer(outputs))));
2580                 spec.extensions.push_back("VK_KHR_16bit_storage");
2581                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2582 
2583                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
2584             }
2585     }
2586 }
2587 
addCompute16bitStorageUniform16StructTo32StructGroup(tcu::TestCaseGroup * group)2588 void addCompute16bitStorageUniform16StructTo32StructGroup(tcu::TestCaseGroup *group)
2589 {
2590     tcu::TestContext &testCtx = group->getTestContext();
2591     de::Random rnd(deStringHash(group->getName()));
2592     const StringTemplate shaderTemplate(
2593         "OpCapability Shader\n"
2594         "OpCapability ${capability}\n"
2595         "OpExtension \"SPV_KHR_16bit_storage\"\n"
2596         "OpMemoryModel Logical GLSL450\n"
2597         "OpEntryPoint GLCompute %main \"main\" %id\n"
2598         "OpExecutionMode %main LocalSize 1 1 1\n"
2599         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2600         "\n"
2601         "${strideF16}"
2602         "\n"
2603         "${strideF32}"
2604         "\n"
2605         "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
2606         "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
2607         "OpDecorate %SSBO_IN ${storage}\n"
2608         "OpDecorate %SSBO_OUT BufferBlock\n"
2609         "OpDecorate %ssboIN DescriptorSet 0\n"
2610         "OpDecorate %ssboOUT DescriptorSet 0\n"
2611         "OpDecorate %ssboIN Binding 0\n"
2612         "OpDecorate %ssboOUT Binding 1\n"
2613         "\n"
2614         "%bool     = OpTypeBool\n"
2615         "%void     = OpTypeVoid\n"
2616         "%voidf    = OpTypeFunction %void\n"
2617         "%u32      = OpTypeInt 32 0\n"
2618         "%uvec3    = OpTypeVector %u32 3\n"
2619         "%uvec3ptr = OpTypePointer Input %uvec3\n"
2620         "\n"
2621         "%i32      = OpTypeInt 32 1\n"
2622         "%v2i32    = OpTypeVector %i32 2\n"
2623         "%v4i32    = OpTypeVector %i32 4\n"
2624         "\n"
2625         "%f32      = OpTypeFloat 32\n"
2626         "%v2f32    = OpTypeVector %f32 2\n"
2627         "%v3f32    = OpTypeVector %f32 3\n"
2628         "%v4f32    = OpTypeVector %f32 4\n"
2629         "${types}\n"
2630         "\n"
2631         "%zero = OpConstant %i32 0\n"
2632         "%c_i32_1 = OpConstant %i32 1\n"
2633         "%c_i32_2 = OpConstant %i32 2\n"
2634         "%c_i32_3 = OpConstant %i32 3\n"
2635         "%c_i32_4 = OpConstant %i32 4\n"
2636         "%c_i32_5 = OpConstant %i32 5\n"
2637         "%c_i32_6 = OpConstant %i32 6\n"
2638         "%c_i32_7 = OpConstant %i32 7\n"
2639         "%c_i32_8 = OpConstant %i32 8\n"
2640         "%c_i32_9 = OpConstant %i32 9\n"
2641         "\n"
2642         "%c_u32_1 = OpConstant %u32 1\n"
2643         "%c_u32_3 = OpConstant %u32 3\n"
2644         "%c_u32_7 = OpConstant %u32 7\n"
2645         "%c_u32_11 = OpConstant %u32 11\n"
2646         "\n"
2647         "%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
2648         "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
2649         "%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
2650         "%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
2651         "%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
2652         "%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
2653         "%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
2654         "%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 %v2f16arr11 %f16 %v3f16arr11 "
2655         "%v4f16arr3\n"
2656         "\n"
2657         "%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
2658         "%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
2659         "%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
2660         "%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
2661         "%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
2662         "%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
2663         "%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
2664         "%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 %v2f32arr11 %f32 %v3f32arr11 "
2665         "%v4f32arr3\n"
2666         "\n"
2667         "%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
2668         "%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
2669         "%SSBO_IN            = OpTypeStruct %f16StructArr7\n"
2670         "%SSBO_OUT           = OpTypeStruct %f32StructArr7\n"
2671         "%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
2672         "%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
2673         "%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
2674         "%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
2675         "\n"
2676         "%id        = OpVariable %uvec3ptr Input\n"
2677         "%main      = OpFunction %void None %voidf\n"
2678         "%label     = OpLabel\n"
2679         "\n"
2680         "%idval     = OpLoad %uvec3 %id\n"
2681         "%x         = OpCompositeExtract %u32 %idval 0\n"
2682         "%y         = OpCompositeExtract %u32 %idval 1\n"
2683         "\n"
2684         "%f16src  = OpAccessChain %f16ptr %ssboIN %zero %x %zero\n"
2685         "%val_f16 = OpLoad %f16 %f16src\n"
2686         "%val_f32 = OpFConvert %f32 %val_f16\n"
2687         "%f32dst  = OpAccessChain %f32ptr %ssboOUT %zero %x %zero\n"
2688         "OpStore %f32dst %val_f32\n"
2689         "\n"
2690         "%v2f16src  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_1\n"
2691         "%val_v2f16 = OpLoad %v2f16 %v2f16src\n"
2692         "%val_v2f32 = OpFConvert %v2f32 %val_v2f16\n"
2693         "%v2f32dst  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_1\n"
2694         "OpStore %v2f32dst %val_v2f32\n"
2695         "\n"
2696         "%v3f16src  = OpAccessChain %v3f16ptr %ssboIN %zero %x %c_i32_2\n"
2697         "%val_v3f16 = OpLoad %v3f16 %v3f16src\n"
2698         "%val_v3f32 = OpFConvert %v3f32 %val_v3f16\n"
2699         "%v3f32dst  = OpAccessChain %v3f32ptr %ssboOUT %zero %x %c_i32_2\n"
2700         "OpStore %v3f32dst %val_v3f32\n"
2701         "\n"
2702         "%v4f16src  = OpAccessChain %v4f16ptr %ssboIN %zero %x %c_i32_3\n"
2703         "%val_v4f16 = OpLoad %v4f16 %v4f16src\n"
2704         "%val_v4f32 = OpFConvert %v4f32 %val_v4f16\n"
2705         "%v4f32dst  = OpAccessChain %v4f32ptr %ssboOUT %zero %x %c_i32_3\n"
2706         "OpStore %v4f32dst %val_v4f32\n"
2707         "\n"
2708         //struct {f16, v2f16[3]}
2709         "%Sf16src  = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_5 %y %zero\n"
2710         "%Sval_f16 = OpLoad %f16 %Sf16src\n"
2711         "%Sval_f32 = OpFConvert %f32 %Sval_f16\n"
2712         "%Sf32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_5 %y %zero\n"
2713         "OpStore %Sf32dst2 %Sval_f32\n"
2714         "\n"
2715         "%Sv2f16src0   = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2716         "%Sv2f16_0     = OpLoad %v2f16 %Sv2f16src0\n"
2717         "%Sv2f32_0     = OpFConvert %v2f32 %Sv2f16_0\n"
2718         "%Sv2f32dst_0  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2719         "OpStore %Sv2f32dst_0 %Sv2f32_0\n"
2720         "\n"
2721         "%Sv2f16src1  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2722         "%Sv2f16_1 = OpLoad %v2f16 %Sv2f16src1\n"
2723         "%Sv2f32_1 = OpFConvert %v2f32 %Sv2f16_1\n"
2724         "%Sv2f32dst_1  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2725         "OpStore %Sv2f32dst_1 %Sv2f32_1\n"
2726         "\n"
2727         "%Sv2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2728         "%Sv2f16_2 = OpLoad %v2f16 %Sv2f16src2\n"
2729         "%Sv2f32_2 = OpFConvert %v2f32 %Sv2f16_2\n"
2730         "%Sv2f32dst_2  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2731         "OpStore %Sv2f32dst_2 %Sv2f32_2\n"
2732         "\n"
2733 
2734         "%v2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %x %c_i32_6 %y\n"
2735         "%val2_v2f16 = OpLoad %v2f16 %v2f16src2\n"
2736         "%val2_v2f32 = OpFConvert %v2f32 %val2_v2f16\n"
2737         "%v2f32dst2  = OpAccessChain %v2f32ptr %ssboOUT %zero %x %c_i32_6 %y\n"
2738         "OpStore %v2f32dst2 %val2_v2f32\n"
2739         "\n"
2740         "%f16src2  = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_7\n"
2741         "%val2_f16 = OpLoad %f16 %f16src2\n"
2742         "%val2_f32 = OpFConvert %f32 %val2_f16\n"
2743         "%f32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_7\n"
2744         "OpStore %f32dst2 %val2_f32\n"
2745         "\n"
2746         "%v3f16src2  = OpAccessChain %v3f16ptr %ssboIN %zero %x %c_i32_8 %y\n"
2747         "%val2_v3f16 = OpLoad %v3f16 %v3f16src2\n"
2748         "%val2_v3f32 = OpFConvert %v3f32 %val2_v3f16\n"
2749         "%v3f32dst2  = OpAccessChain %v3f32ptr %ssboOUT %zero %x %c_i32_8 %y\n"
2750         "OpStore %v3f32dst2 %val2_v3f32\n"
2751         "\n"
2752 
2753         //Array with 3 elements
2754         "%LessThan3 = OpSLessThan %bool %y %c_i32_3\n"
2755         "OpSelectionMerge %BlockIf None\n"
2756         "OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
2757         "%LabelIf = OpLabel\n"
2758         "  %f16src3  = OpAccessChain %f16ptr %ssboIN %zero %x %c_i32_4 %y\n"
2759         "  %val3_f16 = OpLoad %f16 %f16src3\n"
2760         "  %val3_f32 = OpFConvert %f32 %val3_f16\n"
2761         "  %f32dst3  = OpAccessChain %f32ptr %ssboOUT %zero %x %c_i32_4 %y\n"
2762         "  OpStore %f32dst3 %val3_f32\n"
2763         "\n"
2764         "  %v4f16src2  = OpAccessChain %v4f16ptr %ssboIN %zero %x %c_i32_9 %y\n"
2765         "  %val2_v4f16 = OpLoad %v4f16 %v4f16src2\n"
2766         "  %val2_v4f32 = OpFConvert %v4f32 %val2_v4f16\n"
2767         "  %v4f32dst2  = OpAccessChain %v4f32ptr %ssboOUT %zero %x %c_i32_9 %y\n"
2768         "  OpStore %v4f32dst2 %val2_v4f32\n"
2769         "OpBranch %BlockIf\n"
2770         "%BlockIf = OpLabel\n"
2771 
2772         "   OpReturn\n"
2773         "   OpFunctionEnd\n");
2774 
2775     { // Floats
2776         vector<float> float32Data(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430), 0.0f);
2777 
2778         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2779         {
2780             vector<deFloat16> float16DData = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
2781                                                  data16bitStd430(rnd) :
2782                                                  data16bitStd140(rnd);
2783             ComputeShaderSpec spec;
2784             map<string, string> specs;
2785             string testName = string(CAPABILITIES[capIdx].name);
2786 
2787             specs["capability"] = CAPABILITIES[capIdx].cap;
2788             specs["storage"]    = CAPABILITIES[capIdx].decor;
2789             specs["strideF16"]  = getStructShaderComponet(
2790                 (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? SHADERTEMPLATE_STRIDE16BIT_STD430 :
2791                                                                                     SHADERTEMPLATE_STRIDE16BIT_STD140);
2792             specs["strideF32"] = getStructShaderComponet(SHADERTEMPLATE_STRIDE32BIT_STD430);
2793             specs["types"]     = getStructShaderComponet(SHADERTEMPLATE_TYPES);
2794 
2795             spec.assembly      = shaderTemplate.specialize(specs);
2796             spec.numWorkGroups = IVec3(structData.structArraySize, structData.nestedArraySize, 1);
2797             spec.verifyIO      = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
2798                                      computeCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD430,
2799                                                    SHADERTEMPLATE_STRIDE32BIT_STD430> :
2800                                      computeCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD140,
2801                                                    SHADERTEMPLATE_STRIDE32BIT_STD430>;
2802             spec.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DData)), CAPABILITIES[capIdx].dtype));
2803             spec.outputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data))));
2804             spec.extensions.push_back("VK_KHR_16bit_storage");
2805             spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
2806 
2807             group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
2808         }
2809     }
2810 }
2811 
addCompute16bitStorageUniform32StructTo16StructGroup(tcu::TestCaseGroup * group)2812 void addCompute16bitStorageUniform32StructTo16StructGroup(tcu::TestCaseGroup *group)
2813 {
2814     tcu::TestContext &testCtx = group->getTestContext();
2815     de::Random rnd(deStringHash(group->getName()));
2816 
2817     const StringTemplate shaderTemplate(
2818         "OpCapability Shader\n"
2819         "OpCapability ${capability}\n"
2820         "OpExtension \"SPV_KHR_16bit_storage\"\n"
2821         "OpMemoryModel Logical GLSL450\n"
2822         "OpEntryPoint GLCompute %main \"main\" %id\n"
2823         "OpExecutionMode %main LocalSize 1 1 1\n"
2824         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2825         "\n"
2826         "${strideF16}"
2827         "\n"
2828         "${strideF32}"
2829         "\n"
2830         "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
2831         "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
2832         "OpDecorate %SSBO_IN ${storage}\n"
2833         "OpDecorate %SSBO_OUT BufferBlock\n"
2834         "OpDecorate %ssboIN DescriptorSet 0\n"
2835         "OpDecorate %ssboOUT DescriptorSet 0\n"
2836         "OpDecorate %ssboIN Binding 0\n"
2837         "OpDecorate %ssboOUT Binding 1\n"
2838         "\n"
2839         "%bool     = OpTypeBool\n"
2840         "%void     = OpTypeVoid\n"
2841         "%voidf    = OpTypeFunction %void\n"
2842         "%u32      = OpTypeInt 32 0\n"
2843         "%uvec3    = OpTypeVector %u32 3\n"
2844         "%uvec3ptr = OpTypePointer Input %uvec3\n"
2845         "\n"
2846         "%i32      = OpTypeInt 32 1\n"
2847         "%v2i32    = OpTypeVector %i32 2\n"
2848         "%v4i32    = OpTypeVector %i32 4\n"
2849         "\n"
2850         "%f32      = OpTypeFloat 32\n"
2851         "%v2f32    = OpTypeVector %f32 2\n"
2852         "%v3f32    = OpTypeVector %f32 3\n"
2853         "%v4f32    = OpTypeVector %f32 4\n"
2854         "${types}\n"
2855         "\n"
2856         "%zero = OpConstant %i32 0\n"
2857         "%c_i32_1 = OpConstant %i32 1\n"
2858         "%c_i32_2 = OpConstant %i32 2\n"
2859         "%c_i32_3 = OpConstant %i32 3\n"
2860         "%c_i32_4 = OpConstant %i32 4\n"
2861         "%c_i32_5 = OpConstant %i32 5\n"
2862         "%c_i32_6 = OpConstant %i32 6\n"
2863         "%c_i32_7 = OpConstant %i32 7\n"
2864         "%c_i32_8 = OpConstant %i32 8\n"
2865         "%c_i32_9 = OpConstant %i32 9\n"
2866         "\n"
2867         "%c_u32_1 = OpConstant %u32 1\n"
2868         "%c_u32_3 = OpConstant %u32 3\n"
2869         "%c_u32_7 = OpConstant %u32 7\n"
2870         "%c_u32_11 = OpConstant %u32 11\n"
2871         "\n"
2872         "%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
2873         "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
2874         "%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
2875         "%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
2876         "%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
2877         "%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
2878         "%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
2879         "%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 %v2f16arr11 %f16 %v3f16arr11 "
2880         "%v4f16arr3\n"
2881         "\n"
2882         "%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
2883         "%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
2884         "%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
2885         "%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
2886         "%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
2887         "%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
2888         "%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
2889         "%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 %v2f32arr11 %f32 %v3f32arr11 "
2890         "%v4f32arr3\n"
2891         "\n"
2892         "%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
2893         "%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
2894         "%SSBO_IN            = OpTypeStruct %f32StructArr7\n"
2895         "%SSBO_OUT           = OpTypeStruct %f16StructArr7\n"
2896         "%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
2897         "%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
2898         "%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
2899         "%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
2900         "\n"
2901         "%id        = OpVariable %uvec3ptr Input\n"
2902         "%main      = OpFunction %void None %voidf\n"
2903         "%label     = OpLabel\n"
2904         "\n"
2905         "%idval     = OpLoad %uvec3 %id\n"
2906         "%x         = OpCompositeExtract %u32 %idval 0\n"
2907         "%y         = OpCompositeExtract %u32 %idval 1\n"
2908         "\n"
2909         "%f32src  = OpAccessChain %f32ptr %ssboIN %zero %x %zero\n"
2910         "%val_f32 = OpLoad %f32 %f32src\n"
2911         "%val_f16 = OpFConvert %f16 %val_f32\n"
2912         "%f16dst  = OpAccessChain %f16ptr %ssboOUT %zero %x %zero\n"
2913         "OpStore %f16dst %val_f16\n"
2914         "\n"
2915         "%v2f32src  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_1\n"
2916         "%val_v2f32 = OpLoad %v2f32 %v2f32src\n"
2917         "%val_v2f16 = OpFConvert %v2f16 %val_v2f32\n"
2918         "%v2f16dst  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_1\n"
2919         "OpStore %v2f16dst %val_v2f16\n"
2920         "\n"
2921         "%v3f32src  = OpAccessChain %v3f32ptr %ssboIN %zero %x %c_i32_2\n"
2922         "%val_v3f32 = OpLoad %v3f32 %v3f32src\n"
2923         "%val_v3f16 = OpFConvert %v3f16 %val_v3f32\n"
2924         "%v3f16dst  = OpAccessChain %v3f16ptr %ssboOUT %zero %x %c_i32_2\n"
2925         "OpStore %v3f16dst %val_v3f16\n"
2926         "\n"
2927         "%v4f32src  = OpAccessChain %v4f32ptr %ssboIN %zero %x %c_i32_3\n"
2928         "%val_v4f32 = OpLoad %v4f32 %v4f32src\n"
2929         "%val_v4f16 = OpFConvert %v4f16 %val_v4f32\n"
2930         "%v4f16dst  = OpAccessChain %v4f16ptr %ssboOUT %zero %x %c_i32_3\n"
2931         "OpStore %v4f16dst %val_v4f16\n"
2932         "\n"
2933 
2934         //struct {f16, v2f16[3]}
2935         "%Sf32src  = OpAccessChain %f32ptr %ssboIN %zero %x %c_i32_5 %y %zero\n"
2936         "%Sval_f32 = OpLoad %f32 %Sf32src\n"
2937         "%Sval_f16 = OpFConvert %f16 %Sval_f32\n"
2938         "%Sf16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %x %c_i32_5 %y %zero\n"
2939         "OpStore %Sf16dst2 %Sval_f16\n"
2940         "\n"
2941         "%Sv2f32src0   = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2942         "%Sv2f32_0     = OpLoad %v2f32 %Sv2f32src0\n"
2943         "%Sv2f16_0     = OpFConvert %v2f16 %Sv2f32_0\n"
2944         "%Sv2f16dst_0  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %zero\n"
2945         "OpStore %Sv2f16dst_0 %Sv2f16_0\n"
2946         "\n"
2947         "%Sv2f32src1  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2948         "%Sv2f32_1 = OpLoad %v2f32 %Sv2f32src1\n"
2949         "%Sv2f16_1 = OpFConvert %v2f16 %Sv2f32_1\n"
2950         "%Sv2f16dst_1  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_1\n"
2951         "OpStore %Sv2f16dst_1 %Sv2f16_1\n"
2952         "\n"
2953         "%Sv2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2954         "%Sv2f32_2 = OpLoad %v2f32 %Sv2f32src2\n"
2955         "%Sv2f16_2 = OpFConvert %v2f16 %Sv2f32_2\n"
2956         "%Sv2f16dst_2  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_5 %y %c_i32_1 %c_i32_2\n"
2957         "OpStore %Sv2f16dst_2 %Sv2f16_2\n"
2958         "\n"
2959 
2960         "%v2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %x %c_i32_6 %y\n"
2961         "%val2_v2f32 = OpLoad %v2f32 %v2f32src2\n"
2962         "%val2_v2f16 = OpFConvert %v2f16 %val2_v2f32\n"
2963         "%v2f16dst2  = OpAccessChain %v2f16ptr %ssboOUT %zero %x %c_i32_6 %y\n"
2964         "OpStore %v2f16dst2 %val2_v2f16\n"
2965         "\n"
2966         "%f32src2  = OpAccessChain %f32ptr %ssboIN %zero %x %c_i32_7\n"
2967         "%val2_f32 = OpLoad %f32 %f32src2\n"
2968         "%val2_f16 = OpFConvert %f16 %val2_f32\n"
2969         "%f16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %x %c_i32_7\n"
2970         "OpStore %f16dst2 %val2_f16\n"
2971         "\n"
2972         "%v3f32src2  = OpAccessChain %v3f32ptr %ssboIN %zero %x %c_i32_8 %y\n"
2973         "%val2_v3f32 = OpLoad %v3f32 %v3f32src2\n"
2974         "%val2_v3f16 = OpFConvert %v3f16 %val2_v3f32\n"
2975         "%v3f16dst2  = OpAccessChain %v3f16ptr %ssboOUT %zero %x %c_i32_8 %y\n"
2976         "OpStore %v3f16dst2 %val2_v3f16\n"
2977         "\n"
2978 
2979         //Array with 3 elements
2980         "%LessThan3 = OpSLessThan %bool %y %c_i32_3\n"
2981         "OpSelectionMerge %BlockIf None\n"
2982         "OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
2983         "  %LabelIf = OpLabel\n"
2984         "  %f32src3  = OpAccessChain %f32ptr %ssboIN %zero %x %c_i32_4 %y\n"
2985         "  %val3_f32 = OpLoad %f32 %f32src3\n"
2986         "  %val3_f16 = OpFConvert %f16 %val3_f32\n"
2987         "  %f16dst3  = OpAccessChain %f16ptr %ssboOUT %zero %x %c_i32_4 %y\n"
2988         "  OpStore %f16dst3 %val3_f16\n"
2989         "\n"
2990         "  %v4f32src2  = OpAccessChain %v4f32ptr %ssboIN %zero %x %c_i32_9 %y\n"
2991         "  %val2_v4f32 = OpLoad %v4f32 %v4f32src2\n"
2992         "  %val2_v4f16 = OpFConvert %v4f16 %val2_v4f32\n"
2993         "  %v4f16dst2  = OpAccessChain %v4f16ptr %ssboOUT %zero %x %c_i32_9 %y\n"
2994         "  OpStore %v4f16dst2 %val2_v4f16\n"
2995         "OpBranch %BlockIf\n"
2996         "%BlockIf = OpLabel\n"
2997 
2998         "   OpReturn\n"
2999         "   OpFunctionEnd\n");
3000 
3001     { // Floats
3002         vector<deFloat16> float16Data(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430), 0u);
3003 
3004         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3005         {
3006             ComputeShaderSpec spec;
3007             map<string, string> specs;
3008             string testName            = string(CAPABILITIES[capIdx].name);
3009             vector<float> float32DData = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
3010                                              data32bitStd430(rnd) :
3011                                              data32bitStd140(rnd);
3012 
3013             specs["capability"] = CAPABILITIES[capIdx].cap;
3014             specs["storage"]    = CAPABILITIES[capIdx].decor;
3015             specs["strideF16"]  = getStructShaderComponet(SHADERTEMPLATE_STRIDE16BIT_STD430);
3016             specs["strideF32"]  = getStructShaderComponet(
3017                 (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ? SHADERTEMPLATE_STRIDE32BIT_STD430 :
3018                                                                                     SHADERTEMPLATE_STRIDE32BIT_STD140);
3019             specs["types"] = getStructShaderComponet(SHADERTEMPLATE_TYPES);
3020 
3021             spec.assembly      = shaderTemplate.specialize(specs);
3022             spec.numWorkGroups = IVec3(structData.structArraySize, structData.nestedArraySize, 1);
3023             spec.verifyIO      = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
3024                                      computeCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD430,
3025                                                    SHADERTEMPLATE_STRIDE16BIT_STD430> :
3026                                      computeCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD140,
3027                                                    SHADERTEMPLATE_STRIDE16BIT_STD430>;
3028 
3029             spec.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32DData)), CAPABILITIES[capIdx].dtype));
3030             spec.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data))));
3031             spec.extensions.push_back("VK_KHR_16bit_storage");
3032             spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3033 
3034             group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
3035         }
3036     }
3037 }
3038 
addCompute16bitStructMixedTypesGroup(tcu::TestCaseGroup * group)3039 void addCompute16bitStructMixedTypesGroup(tcu::TestCaseGroup *group)
3040 {
3041     tcu::TestContext &testCtx = group->getTestContext();
3042     de::Random rnd(deStringHash(group->getName()));
3043     vector<int16_t> outData(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430), 0u);
3044 
3045     const StringTemplate shaderTemplate(
3046         "OpCapability Shader\n"
3047         "OpCapability StorageUniformBufferBlock16\n"
3048         "${capability}\n"
3049         "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n"
3050         "OpExtension \"SPV_KHR_16bit_storage\"\n"
3051         "OpMemoryModel Logical GLSL450\n"
3052         "OpEntryPoint GLCompute %main \"main\" %id\n"
3053         "OpExecutionMode %main LocalSize 1 1 1\n"
3054         "OpDecorate %id BuiltIn GlobalInvocationId\n"
3055         "${OutOffsets}"
3056         "${InOffsets}"
3057         "\n" //SSBO IN
3058         "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
3059         "OpDecorate %ssboIN DescriptorSet 0\n"
3060         "OpDecorate %SSBO_IN ${storage}\n"
3061         "OpDecorate %SSBO_OUT BufferBlock\n"
3062         "OpDecorate %ssboIN Binding 0\n"
3063         "\n" //SSBO OUT
3064         "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
3065         "OpDecorate %ssboOUT DescriptorSet 0\n"
3066         "OpDecorate %ssboOUT Binding 1\n"
3067         "\n" //Types
3068         "%void  = OpTypeVoid\n"
3069         "%bool  = OpTypeBool\n"
3070         "%i16   = OpTypeInt 16 1\n"
3071         "%v2i16 = OpTypeVector %i16 2\n"
3072         "%v3i16 = OpTypeVector %i16 3\n"
3073         "%v4i16 = OpTypeVector %i16 4\n"
3074         "%i32   = OpTypeInt 32 1\n"
3075         "%v2i32 = OpTypeVector %i32 2\n"
3076         "%v3i32 = OpTypeVector %i32 3\n"
3077         "%v4i32 = OpTypeVector %i32 4\n"
3078         "%u32   = OpTypeInt 32 0\n"
3079         "%uvec3 = OpTypeVector %u32 3\n"
3080         "%f32   = OpTypeFloat 32\n"
3081         "%v4f32 = OpTypeVector %f32  4\n"
3082         "%voidf = OpTypeFunction %void\n"
3083         "\n" //Consta value
3084         "%zero     = OpConstant %i32 0\n"
3085         "%c_i32_1  = OpConstant %i32 1\n"
3086         "%c_i32_2  = OpConstant %i32 2\n"
3087         "%c_i32_3  = OpConstant %i32 3\n"
3088         "%c_i32_4  = OpConstant %i32 4\n"
3089         "%c_i32_5  = OpConstant %i32 5\n"
3090         "%c_i32_6  = OpConstant %i32 6\n"
3091         "%c_i32_7  = OpConstant %i32 7\n"
3092         "%c_i32_8  = OpConstant %i32 8\n"
3093         "%c_i32_9  = OpConstant %i32 9\n"
3094         "%c_i32_10 = OpConstant %i32 10\n"
3095         "%c_i32_11 = OpConstant %i32 11\n"
3096         "%c_u32_1  = OpConstant %u32 1\n"
3097         "%c_u32_7  = OpConstant %u32 7\n"
3098         "%c_u32_11 = OpConstant %u32 11\n"
3099         "\n" //Arrays & Structs
3100         "%v2b16NestedArr11In  = OpTypeArray %v2i16 %c_u32_11\n"
3101         "%b32NestedArr11In    = OpTypeArray %i32 %c_u32_11\n"
3102         "%sb16Arr11In         = OpTypeArray %i16 %c_u32_11\n"
3103         "%sb32Arr11In         = OpTypeArray %i32 %c_u32_11\n"
3104         "%sNestedIn           = OpTypeStruct %i16 %i32 %v2b16NestedArr11In %b32NestedArr11In\n"
3105         "%sNestedArr11In      = OpTypeArray %sNestedIn %c_u32_11\n"
3106         "%structIn            = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11In "
3107         "%sb16Arr11In %sb32Arr11In\n"
3108         "%structArr7In        = OpTypeArray %structIn %c_u32_7\n"
3109         "%v2b16NestedArr11Out = OpTypeArray %v2i16 %c_u32_11\n"
3110         "%b32NestedArr11Out   = OpTypeArray %i32 %c_u32_11\n"
3111         "%sb16Arr11Out        = OpTypeArray %i16 %c_u32_11\n"
3112         "%sb32Arr11Out        = OpTypeArray %i32 %c_u32_11\n"
3113         "%sNestedOut          = OpTypeStruct %i16 %i32 %v2b16NestedArr11Out %b32NestedArr11Out\n"
3114         "%sNestedArr11Out     = OpTypeArray %sNestedOut %c_u32_11\n"
3115         "%structOut           = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11Out "
3116         "%sb16Arr11Out %sb32Arr11Out\n"
3117         "%structArr7Out       = OpTypeArray %structOut %c_u32_7\n"
3118         "\n" //Pointers
3119         "%i16outPtr   = OpTypePointer Uniform %i16\n"
3120         "%v2i16outPtr = OpTypePointer Uniform %v2i16\n"
3121         "%v3i16outPtr = OpTypePointer Uniform %v3i16\n"
3122         "%v4i16outPtr = OpTypePointer Uniform %v4i16\n"
3123         "%i32outPtr   = OpTypePointer Uniform %i32\n"
3124         "%v2i32outPtr = OpTypePointer Uniform %v2i32\n"
3125         "%v3i32outPtr = OpTypePointer Uniform %v3i32\n"
3126         "%v4i32outPtr = OpTypePointer Uniform %v4i32\n"
3127         "%fp_i32      = OpTypePointer Function %i32\n"
3128         "%uvec3ptr    = OpTypePointer Input %uvec3\n"
3129         "\n" //SSBO IN
3130         "%SSBO_IN    = OpTypeStruct %structArr7In\n"
3131         "%up_SSBOIN  = OpTypePointer Uniform %SSBO_IN\n"
3132         "%ssboIN     = OpVariable %up_SSBOIN Uniform\n"
3133         "\n" //SSBO OUT
3134         "%SSBO_OUT   = OpTypeStruct %structArr7Out\n"
3135         "%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
3136         "%ssboOUT    = OpVariable %up_SSBOOUT Uniform\n"
3137         "\n" //MAIN
3138         "%id      = OpVariable %uvec3ptr Input\n"
3139         "%main    = OpFunction %void None %voidf\n"
3140         "%label   = OpLabel\n"
3141         "%ndxArrz = OpVariable %fp_i32  Function\n"
3142         "%idval   = OpLoad %uvec3 %id\n"
3143         "%x       = OpCompositeExtract %u32 %idval 0\n"
3144         "%y       = OpCompositeExtract %u32 %idval 1\n"
3145         "\n" //strutOut.b16 = strutIn.b16
3146         "%inP1  = OpAccessChain %i16${inPtr} %ssboIN %zero %x %zero\n"
3147         "%inV1  = OpLoad %i16 %inP1\n"
3148         "%outP1 = OpAccessChain %i16outPtr %ssboOUT %zero %x %zero\n"
3149         "OpStore %outP1 %inV1\n"
3150         "\n" //strutOut.b32 = strutIn.b32
3151         "%inP2  = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_1\n"
3152         "%inV2  = OpLoad %i32 %inP2\n"
3153         "%outP2 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_1\n"
3154         "OpStore %outP2 %inV2\n"
3155         "\n" //strutOut.v2b16 = strutIn.v2b16
3156         "%inP3  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %x %c_i32_2\n"
3157         "%inV3  = OpLoad %v2i16 %inP3\n"
3158         "%outP3 = OpAccessChain %v2i16outPtr %ssboOUT %zero %x %c_i32_2\n"
3159         "OpStore %outP3 %inV3\n"
3160         "\n" //strutOut.v2b32 = strutIn.v2b32
3161         "%inP4  = OpAccessChain %v2i32${inPtr} %ssboIN %zero %x %c_i32_3\n"
3162         "%inV4  = OpLoad %v2i32 %inP4\n"
3163         "%outP4 = OpAccessChain %v2i32outPtr %ssboOUT %zero %x %c_i32_3\n"
3164         "OpStore %outP4 %inV4\n"
3165         "\n" //strutOut.v3b16 = strutIn.v3b16
3166         "%inP5  = OpAccessChain %v3i16${inPtr} %ssboIN %zero %x %c_i32_4\n"
3167         "%inV5  = OpLoad %v3i16 %inP5\n"
3168         "%outP5 = OpAccessChain %v3i16outPtr %ssboOUT %zero %x %c_i32_4\n"
3169         "OpStore %outP5 %inV5\n"
3170         "\n" //strutOut.v3b32 = strutIn.v3b32
3171         "%inP6  = OpAccessChain %v3i32${inPtr} %ssboIN %zero %x %c_i32_5\n"
3172         "%inV6  = OpLoad %v3i32 %inP6\n"
3173         "%outP6 = OpAccessChain %v3i32outPtr %ssboOUT %zero %x %c_i32_5\n"
3174         "OpStore %outP6 %inV6\n"
3175         "\n" //strutOut.v4b16 = strutIn.v4b16
3176         "%inP7  = OpAccessChain %v4i16${inPtr} %ssboIN %zero %x %c_i32_6\n"
3177         "%inV7  = OpLoad %v4i16 %inP7\n"
3178         "%outP7 = OpAccessChain %v4i16outPtr %ssboOUT %zero %x %c_i32_6\n"
3179         "OpStore %outP7 %inV7\n"
3180         "\n" //strutOut.v4b32 = strutIn.v4b32
3181         "%inP8  = OpAccessChain %v4i32${inPtr} %ssboIN %zero %x %c_i32_7\n"
3182         "%inV8  = OpLoad %v4i32 %inP8\n"
3183         "%outP8 = OpAccessChain %v4i32outPtr %ssboOUT %zero %x %c_i32_7\n"
3184         "OpStore %outP8 %inV8\n"
3185         "\n" //strutOut.b16[y] = strutIn.b16[y]
3186         "%inP9  = OpAccessChain %i16${inPtr} %ssboIN %zero %x %c_i32_9 %y\n"
3187         "%inV9  = OpLoad %i16 %inP9\n"
3188         "%outP9 = OpAccessChain %i16outPtr %ssboOUT %zero %x %c_i32_9 %y\n"
3189         "OpStore %outP9 %inV9\n"
3190         "\n" //strutOut.b32[y] = strutIn.b32[y]
3191         "%inP10  = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_10 %y\n"
3192         "%inV10  = OpLoad %i32 %inP10\n"
3193         "%outP10 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_10 %y\n"
3194         "OpStore %outP10 %inV10\n"
3195         "\n" //strutOut.strutNestedOut[y].b16 = strutIn.strutNestedIn[y].b16
3196         "%inP11 = OpAccessChain %i16${inPtr} %ssboIN %zero %x %c_i32_8 %y %zero\n"
3197         "%inV11 = OpLoad %i16 %inP11\n"
3198         "%outP11 = OpAccessChain %i16outPtr %ssboOUT %zero %x %c_i32_8 %y %zero\n"
3199         "OpStore %outP11 %inV11\n"
3200         "\n" //strutOut.strutNestedOut[y].b32 = strutIn.strutNestedIn[y].b32
3201         "%inP12 = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_8 %y %c_i32_1\n"
3202         "%inV12 = OpLoad %i32 %inP12\n"
3203         "%outP12 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_8 %y %c_i32_1\n"
3204         "OpStore %outP12 %inV12\n"
3205         "\n"
3206         "${zBeginLoop}"
3207         "\n" //strutOut.strutNestedOut[y].v2b16[valNdx] = strutIn.strutNestedIn[y].v2b16[valNdx]
3208         "%inP13  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %x %c_i32_8 %y %c_i32_2 %Valz\n"
3209         "%inV13  = OpLoad %v2i16 %inP13\n"
3210         "%outP13 = OpAccessChain %v2i16outPtr %ssboOUT %zero %x %c_i32_8 %y %c_i32_2 %Valz\n"
3211         "OpStore %outP13 %inV13\n"
3212         "\n" //strutOut.strutNestedOut[y].b32[valNdx] = strutIn.strutNestedIn[y].b32[valNdx]
3213         "%inP14  = OpAccessChain %i32${inPtr} %ssboIN %zero %x %c_i32_8 %y %c_i32_3 %Valz\n"
3214         "%inV14  = OpLoad %i32 %inP14\n"
3215         "%outP14 = OpAccessChain %i32outPtr %ssboOUT %zero %x %c_i32_8 %y %c_i32_3 %Valz\n"
3216         "OpStore %outP14 %inV14\n"
3217         "\n${zEndLoop}\n"
3218         "OpBranch %exitLabel\n"
3219         "%exitLabel = OpLabel\n"
3220         "OpReturn\n"
3221         "OpFunctionEnd\n");
3222 
3223     for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3224     { // int
3225         const bool isUniform   = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER == CAPABILITIES[capIdx].dtype;
3226         vector<int16_t> inData = isUniform ? dataMixStd140(rnd) : dataMixStd430(rnd);
3227         ComputeShaderSpec spec;
3228         map<string, string> specsOffset;
3229         map<string, string> specsLoop;
3230         map<string, string> specs;
3231         string testName = string(CAPABILITIES[capIdx].name);
3232 
3233         specsLoop["exeCount"] = "c_i32_11";
3234         specsLoop["loopName"] = "z";
3235         specs["zBeginLoop"]   = beginLoop(specsLoop);
3236         specs["zEndLoop"]     = endLoop(specsLoop);
3237         specs["capability"]   = isUniform ? "OpCapability " + string(CAPABILITIES[capIdx].cap) : " ";
3238         specs["inPtr"]        = "outPtr";
3239         specs["storage"]      = isUniform ? "Block" : "BufferBlock";
3240         specsOffset["InOut"]  = "In";
3241         specs["InOffsets"]    = StringTemplate(isUniform ? getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD140) :
3242                                                            getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430))
3243                                  .specialize(specsOffset);
3244         specsOffset["InOut"] = "Out";
3245         specs["OutOffsets"] =
3246             StringTemplate(getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430)).specialize(specsOffset);
3247 
3248         spec.assembly      = shaderTemplate.specialize(specs);
3249         spec.numWorkGroups = IVec3(structData.structArraySize, structData.nestedArraySize, 1);
3250         spec.verifyIO =
3251             isUniform ?
3252                 computeCheckStruct<int16_t, int16_t, SHADERTEMPLATE_STRIDEMIX_STD140, SHADERTEMPLATE_STRIDEMIX_STD430> :
3253                 computeCheckStruct<int16_t, int16_t, SHADERTEMPLATE_STRIDEMIX_STD430, SHADERTEMPLATE_STRIDEMIX_STD430>;
3254         spec.inputs.push_back(Resource(BufferSp(new Int16Buffer(inData)), CAPABILITIES[capIdx].dtype));
3255         spec.outputs.push_back(Resource(BufferSp(new Int16Buffer(outData))));
3256         spec.extensions.push_back("VK_KHR_16bit_storage");
3257         spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3258 
3259         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
3260     }
3261 }
3262 
addGraphics16BitStorageUniformFloat32To16Group(tcu::TestCaseGroup * testGroup)3263 void addGraphics16BitStorageUniformFloat32To16Group(tcu::TestCaseGroup *testGroup)
3264 {
3265     de::Random rnd(deStringHash(testGroup->getName()));
3266     map<string, string> fragments;
3267     vector<string> extensions;
3268     const uint32_t numDataPoints = 256;
3269     RGBA defaultColors[4];
3270     const vector<float> float32Data = getFloat32s(rnd, numDataPoints);
3271     vector<float> float32DataPadded;
3272     vector<deFloat16> float16UnusedData(numDataPoints, 0);
3273     const StringTemplate capabilities("OpCapability ${cap}\n");
3274 
3275     for (size_t dataIdx = 0; dataIdx < float32Data.size(); ++dataIdx)
3276     {
3277         float32DataPadded.push_back(float32Data[dataIdx]);
3278         float32DataPadded.push_back(0.0f);
3279         float32DataPadded.push_back(0.0f);
3280         float32DataPadded.push_back(0.0f);
3281     }
3282 
3283     extensions.push_back("VK_KHR_16bit_storage");
3284     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
3285 
3286     struct RndMode
3287     {
3288         const char *name;
3289         const char *decor;
3290         VerifyIOFunc f;
3291     };
3292 
3293     getDefaultColors(defaultColors);
3294 
3295     { // scalar cases
3296         fragments["pre_main"] = "      %f16 = OpTypeFloat 16\n"
3297                                 "%c_i32_256 = OpConstant %i32 256\n"
3298                                 "   %up_f32 = OpTypePointer Uniform %f32\n"
3299                                 "   %up_f16 = OpTypePointer Uniform %f16\n"
3300                                 "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
3301                                 "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
3302                                 "   %SSBO32 = OpTypeStruct %ra_f32\n"
3303                                 "   %SSBO16 = OpTypeStruct %ra_f16\n"
3304                                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3305                                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3306                                 "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3307                                 "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3308 
3309         const StringTemplate decoration("OpDecorate %ra_f32 ArrayStride ${arraystride}\n"
3310                                         "OpDecorate %ra_f16 ArrayStride 2\n"
3311                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3312                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3313                                         "OpDecorate %SSBO32 ${indecor}\n"
3314                                         "OpDecorate %SSBO16 BufferBlock\n"
3315                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3316                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3317                                         "OpDecorate %ssbo32 Binding 0\n"
3318                                         "OpDecorate %ssbo16 Binding 1\n"
3319                                         "${rounddecor}\n");
3320 
3321         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
3322                                "    %param = OpFunctionParameter %v4f32\n"
3323 
3324                                "%entry = OpLabel\n"
3325                                "    %i = OpVariable %fp_i32 Function\n"
3326                                "         OpStore %i %c_i32_0\n"
3327                                "         OpBranch %loop\n"
3328 
3329                                " %loop = OpLabel\n"
3330                                "   %15 = OpLoad %i32 %i\n"
3331                                "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
3332                                "         OpLoopMerge %merge %inc None\n"
3333                                "         OpBranchConditional %lt %write %merge\n"
3334 
3335                                "%write = OpLabel\n"
3336                                "   %30 = OpLoad %i32 %i\n"
3337                                "  %src = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
3338                                "%val32 = OpLoad %f32 %src\n"
3339                                "%val16 = OpFConvert %f16 %val32\n"
3340                                "  %dst = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
3341                                "         OpStore %dst %val16\n"
3342                                "         OpBranch %inc\n"
3343 
3344                                "  %inc = OpLabel\n"
3345                                "   %37 = OpLoad %i32 %i\n"
3346                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3347                                "         OpStore %i %39\n"
3348                                "         OpBranch %loop\n"
3349 
3350                                "%merge = OpLabel\n"
3351                                "         OpReturnValue %param\n"
3352 
3353                                "OpFunctionEnd\n";
3354 
3355         const RndMode rndModes[] = {
3356             {"rtz", "OpDecorate %val16  FPRoundingMode RTZ", graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
3357             {"rte", "OpDecorate %val16  FPRoundingMode RTE", graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
3358             {"unspecified_rnd_mode", "",
3359              graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
3360         };
3361 
3362         const uint32_t arrayStrides[] = {4, 16};
3363 
3364         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3365             for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3366             {
3367                 map<string, string> specs;
3368                 string testName = string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
3369                 GraphicsResources resources;
3370                 VulkanFeatures features;
3371 
3372                 resources.inputs.push_back(
3373                     Resource(BufferSp(new Float32Buffer(arrayStrides[capIdx] == 4 ? float32Data : float32DataPadded)),
3374                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3375                 // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
3376                 resources.outputs.push_back(
3377                     Resource(BufferSp(new Float16Buffer(float16UnusedData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3378 
3379                 specs["cap"]         = CAPABILITIES[capIdx].cap;
3380                 specs["indecor"]     = CAPABILITIES[capIdx].decor;
3381                 specs["arraystride"] = de::toString(arrayStrides[capIdx]);
3382                 specs["rounddecor"]  = rndModes[rndModeIdx].decor;
3383 
3384                 fragments["capability"] = capabilities.specialize(specs);
3385                 fragments["decoration"] = decoration.specialize(specs);
3386 
3387                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
3388                 resources.verifyIO = rndModes[rndModeIdx].f;
3389 
3390                 features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3391                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
3392                 features.coreFeatures.fragmentStoresAndAtomics       = true;
3393 
3394                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
3395                                         testGroup, features);
3396             }
3397     }
3398 
3399     // Non-scalar cases can use the same resources.
3400     GraphicsResources resources;
3401     resources.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3402     // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
3403     resources.outputs.push_back(
3404         Resource(BufferSp(new Float16Buffer(float16UnusedData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3405 
3406     { // vector cases
3407         fragments["pre_main"] = "      %f16 = OpTypeFloat 16\n"
3408                                 " %c_i32_64 = OpConstant %i32 64\n"
3409                                 "     %v4f16 = OpTypeVector %f16 4\n"
3410                                 " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
3411                                 " %up_v4f16 = OpTypePointer Uniform %v4f16\n"
3412                                 " %ra_v4f32 = OpTypeArray %v4f32 %c_i32_64\n"
3413                                 " %ra_v4f16 = OpTypeArray %v4f16 %c_i32_64\n"
3414                                 "   %SSBO32 = OpTypeStruct %ra_v4f32\n"
3415                                 "   %SSBO16 = OpTypeStruct %ra_v4f16\n"
3416                                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3417                                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3418                                 "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3419                                 "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3420 
3421         const StringTemplate decoration("OpDecorate %ra_v4f32 ArrayStride 16\n"
3422                                         "OpDecorate %ra_v4f16 ArrayStride 8\n"
3423                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3424                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3425                                         "OpDecorate %SSBO32 ${indecor}\n"
3426                                         "OpDecorate %SSBO16 BufferBlock\n"
3427                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3428                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3429                                         "OpDecorate %ssbo32 Binding 0\n"
3430                                         "OpDecorate %ssbo16 Binding 1\n"
3431                                         "${rounddecor}\n");
3432 
3433         // ssbo16[] <- convert ssbo32[] to 16bit float
3434         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
3435                                "    %param = OpFunctionParameter %v4f32\n"
3436 
3437                                "%entry = OpLabel\n"
3438                                "    %i = OpVariable %fp_i32 Function\n"
3439                                "         OpStore %i %c_i32_0\n"
3440                                "         OpBranch %loop\n"
3441 
3442                                " %loop = OpLabel\n"
3443                                "   %15 = OpLoad %i32 %i\n"
3444                                "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
3445                                "         OpLoopMerge %merge %inc None\n"
3446                                "         OpBranchConditional %lt %write %merge\n"
3447 
3448                                "%write = OpLabel\n"
3449                                "   %30 = OpLoad %i32 %i\n"
3450                                "  %src = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30\n"
3451                                "%val32 = OpLoad %v4f32 %src\n"
3452                                "%val16 = OpFConvert %v4f16 %val32\n"
3453                                "  %dst = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30\n"
3454                                "         OpStore %dst %val16\n"
3455                                "         OpBranch %inc\n"
3456 
3457                                "  %inc = OpLabel\n"
3458                                "   %37 = OpLoad %i32 %i\n"
3459                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3460                                "         OpStore %i %39\n"
3461                                "         OpBranch %loop\n"
3462 
3463                                "%merge = OpLabel\n"
3464                                "         OpReturnValue %param\n"
3465 
3466                                "OpFunctionEnd\n";
3467 
3468         const RndMode rndModes[] = {
3469             {"rtz", "OpDecorate %val16  FPRoundingMode RTZ", graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
3470             {"rte", "OpDecorate %val16  FPRoundingMode RTE", graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
3471             {"unspecified_rnd_mode", "",
3472              graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
3473         };
3474 
3475         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3476             for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3477             {
3478                 map<string, string> specs;
3479                 VulkanFeatures features;
3480                 string testName = string(CAPABILITIES[capIdx].name) + "_vector_float_" + rndModes[rndModeIdx].name;
3481 
3482                 specs["cap"]        = CAPABILITIES[capIdx].cap;
3483                 specs["indecor"]    = CAPABILITIES[capIdx].decor;
3484                 specs["rounddecor"] = rndModes[rndModeIdx].decor;
3485 
3486                 fragments["capability"] = capabilities.specialize(specs);
3487                 fragments["decoration"] = decoration.specialize(specs);
3488 
3489                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
3490                 resources.verifyIO = rndModes[rndModeIdx].f;
3491 
3492                 features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3493                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
3494                 features.coreFeatures.fragmentStoresAndAtomics       = true;
3495 
3496                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
3497                                         testGroup, features);
3498             }
3499     }
3500 
3501     { // matrix cases
3502         fragments["pre_main"] = "       %f16 = OpTypeFloat 16\n"
3503                                 "  %c_i32_16 = OpConstant %i32 16\n"
3504                                 "     %v4f16 = OpTypeVector %f16 4\n"
3505                                 "   %m4x4f32 = OpTypeMatrix %v4f32 4\n"
3506                                 "   %m4x4f16 = OpTypeMatrix %v4f16 4\n"
3507                                 "  %up_v4f32 = OpTypePointer Uniform %v4f32\n"
3508                                 "  %up_v4f16 = OpTypePointer Uniform %v4f16\n"
3509                                 "%a16m4x4f32 = OpTypeArray %m4x4f32 %c_i32_16\n"
3510                                 "%a16m4x4f16 = OpTypeArray %m4x4f16 %c_i32_16\n"
3511                                 "    %SSBO32 = OpTypeStruct %a16m4x4f32\n"
3512                                 "    %SSBO16 = OpTypeStruct %a16m4x4f16\n"
3513                                 " %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3514                                 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3515                                 "    %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3516                                 "    %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3517 
3518         const StringTemplate decoration("OpDecorate %a16m4x4f32 ArrayStride 64\n"
3519                                         "OpDecorate %a16m4x4f16 ArrayStride 32\n"
3520                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3521                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
3522                                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
3523                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3524                                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
3525                                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n"
3526                                         "OpDecorate %SSBO32 ${indecor}\n"
3527                                         "OpDecorate %SSBO16 BufferBlock\n"
3528                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3529                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3530                                         "OpDecorate %ssbo32 Binding 0\n"
3531                                         "OpDecorate %ssbo16 Binding 1\n"
3532                                         "${rounddecor}\n");
3533 
3534         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
3535                                "    %param = OpFunctionParameter %v4f32\n"
3536 
3537                                "%entry = OpLabel\n"
3538                                "    %i = OpVariable %fp_i32 Function\n"
3539                                "         OpStore %i %c_i32_0\n"
3540                                "         OpBranch %loop\n"
3541 
3542                                " %loop = OpLabel\n"
3543                                "   %15 = OpLoad %i32 %i\n"
3544                                "   %lt = OpSLessThan %bool %15 %c_i32_16\n"
3545                                "         OpLoopMerge %merge %inc None\n"
3546                                "         OpBranchConditional %lt %write %merge\n"
3547 
3548                                "  %write = OpLabel\n"
3549                                "     %30 = OpLoad %i32 %i\n"
3550                                "  %src_0 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
3551                                "  %src_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
3552                                "  %src_2 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
3553                                "  %src_3 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
3554                                "%val32_0 = OpLoad %v4f32 %src_0\n"
3555                                "%val32_1 = OpLoad %v4f32 %src_1\n"
3556                                "%val32_2 = OpLoad %v4f32 %src_2\n"
3557                                "%val32_3 = OpLoad %v4f32 %src_3\n"
3558                                "%val16_0 = OpFConvert %v4f16 %val32_0\n"
3559                                "%val16_1 = OpFConvert %v4f16 %val32_1\n"
3560                                "%val16_2 = OpFConvert %v4f16 %val32_2\n"
3561                                "%val16_3 = OpFConvert %v4f16 %val32_3\n"
3562                                "  %dst_0 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
3563                                "  %dst_1 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
3564                                "  %dst_2 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
3565                                "  %dst_3 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
3566                                "           OpStore %dst_0 %val16_0\n"
3567                                "           OpStore %dst_1 %val16_1\n"
3568                                "           OpStore %dst_2 %val16_2\n"
3569                                "           OpStore %dst_3 %val16_3\n"
3570                                "           OpBranch %inc\n"
3571 
3572                                "  %inc = OpLabel\n"
3573                                "   %37 = OpLoad %i32 %i\n"
3574                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3575                                "         OpStore %i %39\n"
3576                                "         OpBranch %loop\n"
3577 
3578                                "%merge = OpLabel\n"
3579                                "         OpReturnValue %param\n"
3580 
3581                                "OpFunctionEnd\n";
3582 
3583         const RndMode rndModes[] = {
3584             {"rte",
3585              "OpDecorate %val16_0  FPRoundingMode RTE\nOpDecorate %val16_1  FPRoundingMode RTE\nOpDecorate %val16_2  "
3586              "FPRoundingMode RTE\nOpDecorate %val16_3  FPRoundingMode RTE",
3587              graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
3588             {"rtz",
3589              "OpDecorate %val16_0  FPRoundingMode RTZ\nOpDecorate %val16_1  FPRoundingMode RTZ\nOpDecorate %val16_2  "
3590              "FPRoundingMode RTZ\nOpDecorate %val16_3  FPRoundingMode RTZ",
3591              graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
3592             {"unspecified_rnd_mode", "",
3593              graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
3594         };
3595 
3596         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3597             for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3598             {
3599                 map<string, string> specs;
3600                 VulkanFeatures features;
3601                 string testName = string(CAPABILITIES[capIdx].name) + "_matrix_float_" + rndModes[rndModeIdx].name;
3602 
3603                 specs["cap"]        = CAPABILITIES[capIdx].cap;
3604                 specs["indecor"]    = CAPABILITIES[capIdx].decor;
3605                 specs["rounddecor"] = rndModes[rndModeIdx].decor;
3606 
3607                 fragments["capability"] = capabilities.specialize(specs);
3608                 fragments["decoration"] = decoration.specialize(specs);
3609 
3610                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
3611                 resources.verifyIO = rndModes[rndModeIdx].f;
3612 
3613                 features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
3614                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
3615                 features.coreFeatures.fragmentStoresAndAtomics       = true;
3616 
3617                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
3618                                         testGroup, features);
3619             }
3620     }
3621 }
3622 
addGraphics16BitStorageInputOutputFloat32To16Group(tcu::TestCaseGroup * testGroup)3623 void addGraphics16BitStorageInputOutputFloat32To16Group(tcu::TestCaseGroup *testGroup)
3624 {
3625     de::Random rnd(deStringHash(testGroup->getName()));
3626     RGBA defaultColors[4];
3627     vector<string> extensions;
3628     map<string, string> fragments = passthruFragments();
3629     const uint32_t numDataPoints  = 64;
3630     // Special values like inf/nan/denormal may not be preserved when float control features are not provided,
3631     // thus values generating special float16 values must be excluded in input data here.
3632     vector<float> float32Data = getFloat32s(rnd, numDataPoints, false);
3633 
3634     extensions.push_back("VK_KHR_16bit_storage");
3635 
3636     fragments["capability"] = "OpCapability StorageInputOutput16\n";
3637     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
3638 
3639     getDefaultColors(defaultColors);
3640 
3641     struct RndMode
3642     {
3643         const char *name;
3644         const char *decor;
3645         const char *decor_tessc;
3646         RoundingModeFlags flags;
3647     };
3648 
3649     const RndMode rndModes[] = {
3650         {"rtz", "OpDecorate %ret0  FPRoundingMode RTZ\n",
3651          "OpDecorate %ret1  FPRoundingMode RTZ\n"
3652          "OpDecorate %ret2  FPRoundingMode RTZ\n",
3653          ROUNDINGMODE_RTZ},
3654         {"rte", "OpDecorate %ret0  FPRoundingMode RTE\n",
3655          "OpDecorate %ret1  FPRoundingMode RTE\n"
3656          "OpDecorate %ret2  FPRoundingMode RTE\n",
3657          ROUNDINGMODE_RTE},
3658         {"unspecified_rnd_mode", "", "", RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)},
3659     };
3660 
3661     struct Case
3662     {
3663         const char *name;
3664         const char *interfaceOpCall;
3665         const char *interfaceOpFunc;
3666         const char *postInterfaceOp;
3667         const char *postInterfaceOpGeom;
3668         const char *postInterfaceOpTessc;
3669         const char *preMain;
3670         const char *inputType;
3671         const char *outputType;
3672         uint32_t numPerCase;
3673         uint32_t numElements;
3674     };
3675 
3676     const Case cases[] = {{
3677                               // Scalar cases
3678                               "scalar",
3679                               "OpFConvert %f16",
3680                               "",
3681 
3682                               "             %ret0 = OpFConvert %f16 %IF_input_val\n"
3683                               "                OpStore %IF_output %ret0\n",
3684 
3685                               "             %ret0 = OpFConvert %f16 %IF_input_val0\n"
3686                               "                OpStore %IF_output %ret0\n",
3687 
3688                               "             %ret0 = OpFConvert %f16 %IF_input_val0\n"
3689                               "                OpStore %IF_output_ptr0 %ret0\n"
3690                               "             %ret1 = OpFConvert %f16 %IF_input_val1\n"
3691                               "                OpStore %IF_output_ptr1 %ret1\n"
3692                               "             %ret2 = OpFConvert %f16 %IF_input_val2\n"
3693                               "                OpStore %IF_output_ptr2 %ret2\n",
3694 
3695                               "             %f16 = OpTypeFloat 16\n"
3696                               "          %op_f16 = OpTypePointer Output %f16\n"
3697                               "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
3698                               "        %op_a3f16 = OpTypePointer Output %a3f16\n"
3699                               "%f16_f32_function = OpTypeFunction %f16 %f32\n"
3700                               "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
3701                               "        %ip_a3f32 = OpTypePointer Input %a3f32\n",
3702 
3703                               "f32",
3704                               "f16",
3705                               4,
3706                               1,
3707                           },
3708                           {
3709                               // Vector cases
3710                               "vector",
3711 
3712                               "OpFConvert %v2f16",
3713                               "",
3714 
3715                               "             %ret0 = OpFConvert %v2f16 %IF_input_val\n"
3716                               "                OpStore %IF_output %ret0\n",
3717 
3718                               "             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
3719                               "                OpStore %IF_output %ret0\n",
3720 
3721                               "             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
3722                               "                OpStore %IF_output_ptr0 %ret0\n"
3723                               "             %ret1 = OpFConvert %v2f16 %IF_input_val1\n"
3724                               "                OpStore %IF_output_ptr1 %ret1\n"
3725                               "             %ret2 = OpFConvert %v2f16 %IF_input_val2\n"
3726                               "                OpStore %IF_output_ptr2 %ret2\n",
3727 
3728                               "                 %f16 = OpTypeFloat 16\n"
3729                               "               %v2f16 = OpTypeVector %f16 2\n"
3730                               "            %op_v2f16 = OpTypePointer Output %v2f16\n"
3731                               "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
3732                               "          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n"
3733                               "%v2f16_v2f32_function = OpTypeFunction %v2f16 %v2f32\n"
3734                               "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
3735                               "          %ip_a3v2f32 = OpTypePointer Input %a3v2f32\n",
3736 
3737                               "v2f32",
3738                               "v2f16",
3739                               2 * 4,
3740                               2,
3741                           }};
3742 
3743     VulkanFeatures requiredFeatures;
3744     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
3745 
3746     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
3747         for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
3748         {
3749             fragments["interface_op_call"]       = cases[caseIdx].interfaceOpCall;
3750             fragments["interface_op_func"]       = cases[caseIdx].interfaceOpFunc;
3751             fragments["post_interface_op_frag"]  = cases[caseIdx].postInterfaceOp;
3752             fragments["post_interface_op_vert"]  = cases[caseIdx].postInterfaceOp;
3753             fragments["post_interface_op_geom"]  = cases[caseIdx].postInterfaceOpGeom;
3754             fragments["post_interface_op_tesse"] = cases[caseIdx].postInterfaceOpGeom;
3755             fragments["post_interface_op_tessc"] = cases[caseIdx].postInterfaceOpTessc;
3756             fragments["pre_main"]                = cases[caseIdx].preMain;
3757             fragments["decoration"]              = rndModes[rndModeIdx].decor;
3758             fragments["decoration_tessc"]        = rndModes[rndModeIdx].decor_tessc;
3759 
3760             fragments["input_type"]  = cases[caseIdx].inputType;
3761             fragments["output_type"] = cases[caseIdx].outputType;
3762 
3763             GraphicsInterfaces interfaces;
3764             const uint32_t numPerCase = cases[caseIdx].numPerCase;
3765             vector<float> subInputs(numPerCase);
3766             vector<deFloat16> subOutputs(numPerCase);
3767 
3768             // The pipeline need this to call compare16BitFloat() when checking the result.
3769             interfaces.setRoundingMode(rndModes[rndModeIdx].flags);
3770 
3771             for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
3772             {
3773                 string testName =
3774                     string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
3775 
3776                 for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
3777                 {
3778                     subInputs[numNdx] = float32Data[caseNdx * numPerCase + numNdx];
3779                     // We derive the expected result from inputs directly in the graphics pipeline.
3780                     subOutputs[numNdx] = 0;
3781                 }
3782                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32),
3783                                                          BufferSp(new Float32Buffer(subInputs))),
3784                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16),
3785                                                          BufferSp(new Float16Buffer(subOutputs))));
3786                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
3787                                         testGroup, requiredFeatures);
3788             }
3789         }
3790 }
3791 
addGraphics16BitStorageInputOutputFloat16To32Group(tcu::TestCaseGroup * testGroup)3792 void addGraphics16BitStorageInputOutputFloat16To32Group(tcu::TestCaseGroup *testGroup)
3793 {
3794     de::Random rnd(deStringHash(testGroup->getName()));
3795     RGBA defaultColors[4];
3796     vector<string> extensions;
3797     map<string, string> fragments = passthruFragments();
3798     const uint32_t numDataPoints  = 64;
3799     vector<deFloat16> float16Data(getFloat16s(rnd, numDataPoints));
3800     vector<float> float32Data;
3801 
3802     float32Data.reserve(numDataPoints);
3803     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
3804         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
3805 
3806     extensions.push_back("VK_KHR_16bit_storage");
3807 
3808     fragments["capability"] = "OpCapability StorageInputOutput16\n";
3809     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
3810 
3811     getDefaultColors(defaultColors);
3812 
3813     struct Case
3814     {
3815         const char *name;
3816         const char *interfaceOpCall;
3817         const char *interfaceOpFunc;
3818         const char *preMain;
3819         const char *inputType;
3820         const char *outputType;
3821         uint32_t numPerCase;
3822         uint32_t numElements;
3823     };
3824 
3825     Case cases[] = {{
3826                         // Scalar cases
3827                         "scalar",
3828 
3829                         "OpFConvert %f32",
3830                         "",
3831 
3832                         "             %f16 = OpTypeFloat 16\n"
3833                         "          %ip_f16 = OpTypePointer Input %f16\n"
3834                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
3835                         "        %ip_a3f16 = OpTypePointer Input %a3f16\n"
3836                         "%f32_f16_function = OpTypeFunction %f32 %f16\n"
3837                         "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
3838                         "        %op_a3f32 = OpTypePointer Output %a3f32\n",
3839 
3840                         "f16",
3841                         "f32",
3842                         4,
3843                         1,
3844                     },
3845                     {
3846                         // Vector cases
3847                         "vector",
3848 
3849                         "OpFConvert %v2f32",
3850                         "",
3851 
3852                         "                 %f16 = OpTypeFloat 16\n"
3853                         "                %v2f16 = OpTypeVector %f16 2\n"
3854                         "            %ip_v2f16 = OpTypePointer Input %v2f16\n"
3855                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
3856                         "          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
3857                         "%v2f32_v2f16_function = OpTypeFunction %v2f32 %v2f16\n"
3858                         "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
3859                         "          %op_a3v2f32 = OpTypePointer Output %a3v2f32\n",
3860 
3861                         "v2f16",
3862                         "v2f32",
3863                         2 * 4,
3864                         2,
3865                     }};
3866 
3867     VulkanFeatures requiredFeatures;
3868     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
3869 
3870     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
3871     {
3872         fragments["interface_op_call"] = cases[caseIdx].interfaceOpCall;
3873         fragments["interface_op_func"] = cases[caseIdx].interfaceOpFunc;
3874         fragments["pre_main"]          = cases[caseIdx].preMain;
3875 
3876         fragments["input_type"]  = cases[caseIdx].inputType;
3877         fragments["output_type"] = cases[caseIdx].outputType;
3878 
3879         GraphicsInterfaces interfaces;
3880         const uint32_t numPerCase = cases[caseIdx].numPerCase;
3881         vector<deFloat16> subInputs(numPerCase);
3882         vector<float> subOutputs(numPerCase);
3883 
3884         for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
3885         {
3886             string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
3887 
3888             for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
3889             {
3890                 subInputs[numNdx]  = float16Data[caseNdx * numPerCase + numNdx];
3891                 subOutputs[numNdx] = float32Data[caseNdx * numPerCase + numNdx];
3892             }
3893             interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16),
3894                                                      BufferSp(new Float16Buffer(subInputs))),
3895                                       std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32),
3896                                                      BufferSp(new Float32Buffer(subOutputs))));
3897             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
3898                                     testGroup, requiredFeatures);
3899         }
3900     }
3901 }
3902 
addGraphics16BitStorageInputOutputFloat16To16Group(tcu::TestCaseGroup * testGroup)3903 void addGraphics16BitStorageInputOutputFloat16To16Group(tcu::TestCaseGroup *testGroup)
3904 {
3905     de::Random rnd(deStringHash(testGroup->getName()));
3906     RGBA defaultColors[4];
3907     vector<string> extensions;
3908     map<string, string> fragments = passthruFragments();
3909     const uint32_t numDataPoints  = 64;
3910     // Special values like inf/nan/denormal may not be preserved when float control features are not provided,
3911     // thus those values must be excluded in the input data here.
3912     vector<deFloat16> float16Data(getFloat16s(rnd, numDataPoints, false));
3913     VulkanFeatures requiredFeatures;
3914 
3915     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
3916     extensions.push_back("VK_KHR_16bit_storage");
3917 
3918     fragments["capability"] = "OpCapability StorageInputOutput16\n";
3919     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
3920 
3921     getDefaultColors(defaultColors);
3922 
3923     struct Case
3924     {
3925         const char *name;
3926         const char *interfaceOpCall;
3927         const char *interfaceOpFunc;
3928         const char *preMain;
3929         const char *inputType;
3930         const char *outputType;
3931         uint32_t numPerCase;
3932         uint32_t numElements;
3933     };
3934 
3935     Case cases[] = {{
3936                         // Scalar cases
3937                         "scalar",
3938 
3939                         "OpCopyObject %f16",
3940                         "",
3941 
3942                         "             %f16 = OpTypeFloat 16\n"
3943                         "          %ip_f16 = OpTypePointer Input %f16\n"
3944                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
3945                         "        %ip_a3f16 = OpTypePointer Input %a3f16\n"
3946                         "%f16_f16_function = OpTypeFunction %f16 %f16\n"
3947                         "          %op_f16 = OpTypePointer Output %f16\n"
3948                         "        %op_a3f16 = OpTypePointer Output %a3f16\n",
3949 
3950                         "f16",
3951                         "f16",
3952                         4,
3953                         1,
3954                     },
3955                     {
3956                         // Vector cases
3957                         "vector",
3958 
3959                         "OpCopyObject %v2f16",
3960                         "",
3961 
3962                         "                 %f16 = OpTypeFloat 16\n"
3963                         "               %v2f16 = OpTypeVector %f16 2\n"
3964                         "            %ip_v2f16 = OpTypePointer Input %v2f16\n"
3965                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
3966                         "          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
3967                         "%v2f16_v2f16_function = OpTypeFunction %v2f16 %v2f16\n"
3968                         "            %op_v2f16 = OpTypePointer Output %v2f16\n"
3969                         "          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n",
3970 
3971                         "v2f16",
3972                         "v2f16",
3973                         2 * 4,
3974                         2,
3975                     }};
3976 
3977     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
3978     {
3979         fragments["interface_op_call"] = cases[caseIdx].interfaceOpCall;
3980         fragments["interface_op_func"] = cases[caseIdx].interfaceOpFunc;
3981         fragments["pre_main"]          = cases[caseIdx].preMain;
3982 
3983         fragments["input_type"]  = cases[caseIdx].inputType;
3984         fragments["output_type"] = cases[caseIdx].outputType;
3985 
3986         GraphicsInterfaces interfaces;
3987         const uint32_t numPerCase = cases[caseIdx].numPerCase;
3988         vector<deFloat16> subInputsOutputs(numPerCase);
3989 
3990         for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
3991         {
3992             string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
3993 
3994             for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
3995                 subInputsOutputs[numNdx] = float16Data[caseNdx * numPerCase + numNdx];
3996 
3997             interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16),
3998                                                      BufferSp(new Float16Buffer(subInputsOutputs))),
3999                                       std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16),
4000                                                      BufferSp(new Float16Buffer(subInputsOutputs))));
4001 
4002             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
4003                                     testGroup, requiredFeatures);
4004         }
4005     }
4006 }
4007 
addShaderCode16BitStorageInputOutput16To16x2(vk::SourceCollections & dst,TestDefinition def)4008 void addShaderCode16BitStorageInputOutput16To16x2(vk::SourceCollections &dst, TestDefinition def)
4009 {
4010     SpirvVersion targetSpirvVersion = def.instanceContext.resources.spirvVersion;
4011     const uint32_t vulkanVersion    = dst.usedVulkanVersion;
4012     map<string, string> spec;
4013 
4014     switch (def.dataType)
4015     {
4016     case DATATYPE_FLOAT:
4017         spec["type"]    = "f";
4018         spec["convert"] = "OpFConvert";
4019         spec["scale"]   = "%x = OpCopyObject %f32 %dataIn0_converted\n%y = OpCopyObject %f32 %dataIn1_converted\n";
4020         spec["colorConstruct"] = "OpCompositeConstruct %v4f32 %x %y %c_f32_1 %c_f32_1";
4021         spec["interpolation0"] = spec["interpolation1"] = "";
4022         break;
4023 
4024     case DATATYPE_VEC2:
4025         spec["type"]    = "v2f";
4026         spec["convert"] = "OpFConvert";
4027         spec["scale"] = "%xy = OpCopyObject %v2f32 %dataIn0_converted\n%zw = OpCopyObject %v2f32 %dataIn1_converted\n";
4028         spec["colorConstruct"] = "OpCompositeConstruct %v4f32 %xy %zw";
4029         spec["interpolation0"] = spec["interpolation1"] = "";
4030         break;
4031 
4032     case DATATYPE_INT:
4033         spec["type"]    = "i";
4034         spec["convert"] = "OpSConvert";
4035         spec["scale"] =
4036             "%x_unscaled = OpConvertSToF %f32 %dataIn0_converted\n%x = OpFDiv %f32 %x_unscaled %scale_f32\n%y_unscaled "
4037             "= OpConvertSToF %f32 %dataIn1_converted\n%y = OpFDiv %f32 %y_unscaled %scale_f32\n";
4038         spec["colorConstruct"] = "OpCompositeConstruct %v4f32 %x %y %c_f32_1 %c_f32_1";
4039         spec["interpolation0"] = "OpDecorate %dataIn0 Flat";
4040         spec["interpolation1"] = "OpDecorate %dataIn1 Flat";
4041         break;
4042 
4043     case DATATYPE_UINT:
4044         spec["type"]    = "u";
4045         spec["convert"] = "OpUConvert";
4046         spec["scale"] =
4047             "%x_unscaled = OpConvertUToF %f32 %dataIn0_converted\n%x = OpFDiv %f32 %x_unscaled %scale_f32\n%y_unscaled "
4048             "= OpConvertUToF %f32 %dataIn1_converted\n%y = OpFDiv %f32 %y_unscaled %scale_f32\n";
4049         spec["colorConstruct"] = "OpCompositeConstruct %v4f32 %x %y %c_f32_1 %c_f32_1";
4050         spec["interpolation0"] = "OpDecorate %dataIn0 Flat";
4051         spec["interpolation1"] = "OpDecorate %dataIn1 Flat";
4052         break;
4053 
4054     case DATATYPE_IVEC2:
4055         spec["type"]    = "v2i";
4056         spec["convert"] = "OpSConvert";
4057         spec["scale"]   = "%xy_unscaled = OpConvertSToF %v2f32 %dataIn0_converted\n%xy = OpFDiv %v2f32 %xy_unscaled "
4058                           "%scale_v2f32\n%zw_unscaled = OpConvertSToF %v2f32 %dataIn1_converted\n%zw = OpFDiv %v2f32 "
4059                           "%zw_unscaled %scale_v2f32\n";
4060         spec["colorConstruct"] = "OpCompositeConstruct %v4f32 %xy %zw";
4061         spec["interpolation0"] = "OpDecorate %dataIn0 Flat";
4062         spec["interpolation1"] = "OpDecorate %dataIn1 Flat";
4063         break;
4064 
4065     case DATATYPE_UVEC2:
4066         spec["type"]    = "v2u";
4067         spec["convert"] = "OpUConvert";
4068         spec["scale"]   = "%xy_unscaled = OpConvertUToF %v2f32 %dataIn0_converted\n%xy = OpFDiv %v2f32 %xy_unscaled "
4069                           "%scale_v2f32\n%zw_unscaled = OpConvertUToF %v2f32 %dataIn1_converted\n%zw = OpFDiv %v2f32 "
4070                           "%zw_unscaled %scale_v2f32\n";
4071         spec["colorConstruct"] = "OpCompositeConstruct %v4f32 %xy %zw";
4072         spec["interpolation0"] = "OpDecorate %dataIn0 Flat";
4073         spec["interpolation1"] = "OpDecorate %dataIn1 Flat";
4074         break;
4075 
4076     default:
4077         DE_FATAL("Unexpected data type");
4078         break;
4079     }
4080 
4081     // Read input data from binding 1, location 2. Should have value(s) of 0.5 in 16bit float or 32767 in 16bit int.
4082     // Store the value to two outputs (dataOut0 and 1).
4083     StringTemplate vertexShader("                             OpCapability Shader\n"
4084                                 "                             OpCapability StorageInputOutput16\n"
4085                                 "                             OpExtension \"SPV_KHR_16bit_storage\"\n"
4086                                 "                        %1 = OpExtInstImport \"GLSL.std.450\"\n"
4087                                 "                             OpMemoryModel Logical GLSL450\n"
4088                                 "                             OpEntryPoint Vertex %main \"main\" %_ %position "
4089                                 "%vtxColor %dataIn %color %dataOut0 %dataOut1\n"
4090                                 "                             OpSource GLSL 430\n"
4091                                 "                             OpMemberDecorate %gl_PerVertex 0 BuiltIn Position\n"
4092                                 "                             OpMemberDecorate %gl_PerVertex 1 BuiltIn PointSize\n"
4093                                 "                             OpMemberDecorate %gl_PerVertex 2 BuiltIn ClipDistance\n"
4094                                 "                             OpDecorate %gl_PerVertex Block\n"
4095                                 "                             OpDecorate %position Location 0\n"
4096                                 "                             OpDecorate %vtxColor Location 1\n"
4097                                 "                             OpDecorate %dataIn Location 2\n"
4098                                 "                             OpDecorate %color Location 1\n"
4099                                 "                             OpDecorate %dataOut0 Location 2\n"
4100                                 "                             OpDecorate %dataOut1 Location 3\n"
4101                                 "                     %void = OpTypeVoid\n"
4102                                 "                %void_func = OpTypeFunction %void\n"
4103                                 "                      %f32 = OpTypeFloat 32\n"
4104                                 "                      %f16 = OpTypeFloat 16\n"
4105                                 "                      %i32 = OpTypeInt 32 1\n"
4106                                 "                      %i16 = OpTypeInt 16 1\n"
4107                                 "                      %u32 = OpTypeInt 32 0\n"
4108                                 "                      %u16 = OpTypeInt 16 0\n"
4109                                 "                    %v4f32 = OpTypeVector %f32 4\n"
4110                                 "                    %v2f32 = OpTypeVector %f32 2\n"
4111                                 "                    %v2f16 = OpTypeVector %f16 2\n"
4112                                 "                    %v2i32 = OpTypeVector %i32 2\n"
4113                                 "                    %v2i16 = OpTypeVector %i16 2\n"
4114                                 "                    %v2u32 = OpTypeVector %u32 2\n"
4115                                 "                    %v2u16 = OpTypeVector %u16 2\n"
4116                                 "                    %u32_0 = OpConstant %u32 0\n"
4117                                 "                    %u32_1 = OpConstant %u32 1\n"
4118                                 "           %_arr_f32_u32_1 = OpTypeArray %f32 %u32_1\n"
4119                                 "             %gl_PerVertex = OpTypeStruct %v4f32 %f32 %_arr_f32_u32_1\n"
4120                                 " %_ptr_Output_gl_PerVertex = OpTypePointer Output %gl_PerVertex\n"
4121                                 "        %_ptr_Output_v4f32 = OpTypePointer Output %v4f32\n"
4122                                 "    %_ptr_Output_${type}16 = OpTypePointer Output %${type}16\n"
4123                                 "     %_ptr_Input_${type}16 = OpTypePointer Input %${type}16\n"
4124                                 "         %_ptr_Input_v4f32 = OpTypePointer Input %v4f32\n"
4125                                 "                        %_ = OpVariable %_ptr_Output_gl_PerVertex Output\n"
4126                                 "                   %dataIn = OpVariable %_ptr_Input_${type}16 Input\n"
4127                                 "                 %position = OpVariable %_ptr_Input_v4f32 Input\n"
4128                                 "                    %color = OpVariable %_ptr_Input_v4f32 Input\n"
4129                                 "                 %vtxColor = OpVariable %_ptr_Output_v4f32 Output\n"
4130                                 "                 %dataOut0 = OpVariable %_ptr_Output_${type}16 Output\n"
4131                                 "                 %dataOut1 = OpVariable %_ptr_Output_${type}16 Output\n"
4132                                 "                     %main = OpFunction %void None %void_func\n"
4133                                 "                    %entry = OpLabel\n"
4134                                 "                  %posData = OpLoad %v4f32 %position\n"
4135                                 "             %posOutputPtr = OpAccessChain %_ptr_Output_v4f32 %_ %u32_0\n"
4136                                 "                             OpStore %posOutputPtr %posData\n"
4137                                 "                %colorData = OpLoad %v4f32 %color\n"
4138                                 "                             OpStore %vtxColor %colorData\n"
4139                                 "                        %d = OpLoad %${type}16 %dataIn\n"
4140                                 "                             OpStore %dataOut0 %d\n"
4141                                 "                             OpStore %dataOut1 %d\n"
4142                                 "                             OpReturn\n"
4143                                 "                             OpFunctionEnd\n");
4144 
4145     // Scalar:
4146     // Read two 16bit values from vertex shader. Convert to 32bit and store as
4147     // fragment color of (val0, val1, 1.0, 1.0). Val0 and 1 should equal to 0.5.
4148     // Vector:
4149     // Read two 16bit vec2s from vertex shader. Convert to 32bit and store as
4150     // fragment color of (val0.x, val0.y, val1.x, val1.y). Val0 and 1 should equal to (0.5, 0.5).
4151     StringTemplate fragmentShader("                             OpCapability Shader\n"
4152                                   "                             OpCapability StorageInputOutput16\n"
4153                                   "                             OpExtension \"SPV_KHR_16bit_storage\"\n"
4154                                   "                        %1 = OpExtInstImport \"GLSL.std.450\"\n"
4155                                   "                             OpMemoryModel Logical GLSL450\n"
4156                                   "                             OpEntryPoint Fragment %main \"main\" %fragColor "
4157                                   "%dataOut %vtxColor %dataIn0 %dataIn1\n"
4158                                   "                             OpExecutionMode %main OriginUpperLeft\n"
4159                                   "                             OpSource GLSL 430\n"
4160                                   "                             OpDecorate %vtxColor Location 1\n"
4161                                   "                             OpDecorate %dataIn0 Location 2\n"
4162                                   "                             OpDecorate %dataIn1 Location 3\n"
4163                                   "                             ${interpolation0}\n"
4164                                   "                             ${interpolation1}\n"
4165                                   "                             OpDecorate %fragColor Location 0\n"
4166                                   "                             OpDecorate %dataOut Location 1\n"
4167                                   "                     %void = OpTypeVoid\n"
4168                                   "                %void_func = OpTypeFunction %void\n"
4169                                   "                      %f32 = OpTypeFloat 32\n"
4170                                   "                      %f16 = OpTypeFloat 16\n"
4171                                   "                      %i32 = OpTypeInt 32 1\n"
4172                                   "                      %i16 = OpTypeInt 16 1\n"
4173                                   "                      %u32 = OpTypeInt 32 0\n"
4174                                   "                      %u16 = OpTypeInt 16 0\n"
4175                                   "                    %v2f32 = OpTypeVector %f32 2\n"
4176                                   "                    %v2f16 = OpTypeVector %f16 2\n"
4177                                   "                    %v4f32 = OpTypeVector %f32 4\n"
4178                                   "                    %v2i32 = OpTypeVector %i32 2\n"
4179                                   "                    %v2i16 = OpTypeVector %i16 2\n"
4180                                   "                    %v2u32 = OpTypeVector %u32 2\n"
4181                                   "                    %v2u16 = OpTypeVector %u16 2\n"
4182                                   "        %_ptr_Output_v4f32 = OpTypePointer Output %v4f32\n"
4183                                   "    %_ptr_Output_${type}16 = OpTypePointer Output %${type}16\n"
4184                                   "                %fragColor = OpVariable %_ptr_Output_v4f32 Output\n"
4185                                   "                  %dataOut = OpVariable %_ptr_Output_${type}16 Output\n"
4186                                   "     %_ptr_Input_${type}16 = OpTypePointer Input %${type}16\n"
4187                                   "         %_ptr_Input_v4f32 = OpTypePointer Input %v4f32\n"
4188                                   "                 %vtxColor = OpVariable %_ptr_Input_v4f32 Input\n"
4189                                   "                  %dataIn0 = OpVariable %_ptr_Input_${type}16 Input\n"
4190                                   "                  %dataIn1 = OpVariable %_ptr_Input_${type}16 Input\n"
4191                                   "                  %c_f32_1 = OpConstant %f32 1\n"
4192                                   "                %scale_f32 = OpConstant %f32 65534.0\n"
4193                                   "              %scale_v2f32 = OpConstantComposite %v2f32 %scale_f32 %scale_f32\n"
4194                                   "                     %main = OpFunction %void None %void_func\n"
4195                                   "                    %entry = OpLabel\n"
4196                                   "              %dataIn0_val = OpLoad %${type}16 %dataIn0\n"
4197                                   "              %dataIn1_val = OpLoad %${type}16 %dataIn1\n"
4198                                   "        %dataIn0_converted = ${convert} %${type}32 %dataIn0_val\n"
4199                                   "        %dataIn1_converted = ${convert} %${type}32 %dataIn1_val\n"
4200                                   "${scale}"
4201                                   "                    %color = ${colorConstruct}\n"
4202                                   "                             OpStore %fragColor %color\n"
4203                                   "                             OpStore %dataOut %dataIn0_val\n"
4204                                   "                             OpReturn\n"
4205                                   "                             OpFunctionEnd\n");
4206 
4207     dst.spirvAsmSources.add("vert", DE_NULL)
4208         << vertexShader.specialize(spec) << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4209     dst.spirvAsmSources.add("frag", DE_NULL)
4210         << fragmentShader.specialize(spec) << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4211 }
4212 
runAndVerifyDefaultPipeline(Context & context,TestDefinition testDef)4213 TestStatus runAndVerifyDefaultPipeline(Context &context, TestDefinition testDef)
4214 {
4215     return runAndVerifyDefaultPipeline(context, testDef.instanceContext);
4216 }
4217 
addGraphics16BitStorageInputOutputFloat16To16x2Group(tcu::TestCaseGroup * testGroup)4218 void addGraphics16BitStorageInputOutputFloat16To16x2Group(tcu::TestCaseGroup *testGroup)
4219 {
4220     RGBA defaultColors[4];
4221     SpecConstants noSpecConstants;
4222     PushConstants noPushConstants;
4223     vector<string> extensions;
4224     map<string, string> noFragments;
4225     GraphicsResources noResources;
4226     StageToSpecConstantMap specConstantMap;
4227     VulkanFeatures requiredFeatures;
4228 
4229     const ShaderElement pipelineStages[] = {
4230         ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
4231         ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
4232     };
4233 
4234     specConstantMap[VK_SHADER_STAGE_VERTEX_BIT]   = noSpecConstants;
4235     specConstantMap[VK_SHADER_STAGE_FRAGMENT_BIT] = noSpecConstants;
4236 
4237     getDefaultColors(defaultColors);
4238 
4239     extensions.push_back("VK_KHR_16bit_storage");
4240     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
4241 
4242     const struct
4243     {
4244         string name;
4245         uint32_t numElements;
4246         TestDefDataType dataType;
4247         NumberType numberType;
4248         bool isVector;
4249     } cases[] = {
4250         {"scalar", 1, DATATYPE_FLOAT, NUMBERTYPE_FLOAT16, false},
4251         {"vec2", 2, DATATYPE_VEC2, NUMBERTYPE_FLOAT16, true},
4252     };
4253 
4254     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4255     {
4256         const RGBA outColor(128u, 128u, cases[caseIdx].isVector ? 128u : 255u, cases[caseIdx].isVector ? 128u : 255u);
4257         RGBA outputColors[4] = {outColor, outColor, outColor, outColor};
4258         vector<deFloat16> float16Data(4 * cases[caseIdx].numElements, deFloat32To16(0.5f));
4259         GraphicsInterfaces interfaces;
4260 
4261         interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType),
4262                                                  BufferSp(new Float16Buffer(float16Data))),
4263                                   std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType),
4264                                                  BufferSp(new Float16Buffer(float16Data))));
4265 
4266         const InstanceContext &instanceContext = createInstanceContext(
4267             pipelineStages, defaultColors, outputColors, noFragments, specConstantMap, noPushConstants, noResources,
4268             interfaces, extensions, requiredFeatures, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
4269             QP_TEST_RESULT_FAIL, string());
4270 
4271         TestDefinition testDef = {instanceContext, cases[caseIdx].dataType};
4272 
4273         addFunctionCaseWithPrograms<TestDefinition>(testGroup, cases[caseIdx].name,
4274                                                     addShaderCode16BitStorageInputOutput16To16x2,
4275                                                     runAndVerifyDefaultPipeline, testDef);
4276     }
4277 }
4278 
addGraphics16BitStorageInputOutputInt16To16x2Group(tcu::TestCaseGroup * testGroup)4279 void addGraphics16BitStorageInputOutputInt16To16x2Group(tcu::TestCaseGroup *testGroup)
4280 {
4281     map<string, string> fragments;
4282     RGBA defaultColors[4];
4283     SpecConstants noSpecConstants;
4284     PushConstants noPushConstants;
4285     vector<string> extensions;
4286     GraphicsResources noResources;
4287     StageToSpecConstantMap specConstantMap;
4288     VulkanFeatures requiredFeatures;
4289 
4290     const ShaderElement pipelineStages[] = {
4291         ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
4292         ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
4293     };
4294 
4295     specConstantMap[VK_SHADER_STAGE_VERTEX_BIT]   = noSpecConstants;
4296     specConstantMap[VK_SHADER_STAGE_FRAGMENT_BIT] = noSpecConstants;
4297 
4298     getDefaultColors(defaultColors);
4299 
4300     extensions.push_back("VK_KHR_16bit_storage");
4301     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
4302     requiredFeatures.coreFeatures.shaderInt16             = true;
4303 
4304     const struct
4305     {
4306         string name;
4307         uint32_t numElements;
4308         TestDefDataType dataType;
4309         NumberType numberType;
4310         bool isVector;
4311     } cases[] = {{"scalar_int", 1, DATATYPE_INT, NUMBERTYPE_INT16, false},
4312                  {"scalar_uint", 1, DATATYPE_UINT, NUMBERTYPE_UINT16, false},
4313                  {"ivec2", 2, DATATYPE_IVEC2, NUMBERTYPE_INT16, true},
4314                  {"uvec2", 2, DATATYPE_UVEC2, NUMBERTYPE_UINT16, true}};
4315 
4316     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4317     {
4318         const RGBA outColor(128u, 128u, cases[caseIdx].isVector ? 128u : 255u, cases[caseIdx].isVector ? 128u : 255u);
4319         RGBA outputColors[4] = {outColor, outColor, outColor, outColor};
4320         vector<int16_t> int16Data(4 * cases[caseIdx].numElements, 32767);
4321         GraphicsInterfaces interfaces;
4322 
4323         interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType),
4324                                                  BufferSp(new Int16Buffer(int16Data))),
4325                                   std::make_pair(IFDataType(cases[caseIdx].numElements, cases[caseIdx].numberType),
4326                                                  BufferSp(new Int16Buffer(int16Data))));
4327 
4328         const InstanceContext &instanceContext = createInstanceContext(
4329             pipelineStages, defaultColors, outputColors, fragments, specConstantMap, noPushConstants, noResources,
4330             interfaces, extensions, requiredFeatures, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
4331             QP_TEST_RESULT_FAIL, string());
4332 
4333         TestDefinition testDef = {instanceContext, cases[caseIdx].dataType};
4334 
4335         addFunctionCaseWithPrograms<TestDefinition>(testGroup, cases[caseIdx].name,
4336                                                     addShaderCode16BitStorageInputOutput16To16x2,
4337                                                     runAndVerifyDefaultPipeline, testDef);
4338     }
4339 }
4340 
addGraphics16BitStorageInputOutputInt32To16Group(tcu::TestCaseGroup * testGroup)4341 void addGraphics16BitStorageInputOutputInt32To16Group(tcu::TestCaseGroup *testGroup)
4342 {
4343     de::Random rnd(deStringHash(testGroup->getName()));
4344     RGBA defaultColors[4];
4345     vector<string> extensions;
4346     map<string, string> fragments = passthruFragments();
4347     const uint32_t numDataPoints  = 64;
4348     // inputs and outputs are declared to be vectors of signed integers.
4349     // However, depending on the test, they may be interpreted as unsiged
4350     // integers. That won't be a problem as long as we passed the bits
4351     // in faithfully to the pipeline.
4352     vector<int32_t> inputs = getInt32s(rnd, numDataPoints);
4353     vector<int16_t> outputs;
4354 
4355     outputs.reserve(inputs.size());
4356     for (uint32_t numNdx = 0; numNdx < inputs.size(); ++numNdx)
4357         outputs.push_back(static_cast<int16_t>(0xffff & inputs[numNdx]));
4358 
4359     extensions.push_back("VK_KHR_16bit_storage");
4360 
4361     fragments["capability"] = "OpCapability StorageInputOutput16\n";
4362     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
4363 
4364     getDefaultColors(defaultColors);
4365 
4366     const StringTemplate scalarInterfaceOpCall("${convert} %${type16}");
4367 
4368     const StringTemplate scalarInterfaceOpFunc("");
4369 
4370     const StringTemplate scalarPreMain("             %${type16} = OpTypeInt 16 ${signed}\n"
4371                                        "          %op_${type16} = OpTypePointer Output %${type16}\n"
4372                                        "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4373                                        "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
4374                                        "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
4375                                        "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4376                                        "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
4377 
4378     const StringTemplate vecInterfaceOpCall("${convert} %${type16}");
4379 
4380     const StringTemplate vecInterfaceOpFunc("");
4381 
4382     const StringTemplate vecPreMain("                    %i16 = OpTypeInt 16 1\n"
4383                                     "                    %u16 = OpTypeInt 16 0\n"
4384                                     "                 %v4i16 = OpTypeVector %i16 4\n"
4385                                     "                 %v4u16 = OpTypeVector %u16 4\n"
4386                                     "          %op_${type16} = OpTypePointer Output %${type16}\n"
4387                                     "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4388                                     "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
4389                                     "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
4390                                     "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4391                                     "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
4392 
4393     struct Case
4394     {
4395         const char *name;
4396         const StringTemplate &interfaceOpCall;
4397         const StringTemplate &interfaceOpFunc;
4398         const StringTemplate &preMain;
4399         const char *type32;
4400         const char *type16;
4401         const char *sign;
4402         const char *opcode;
4403         uint32_t numPerCase;
4404         uint32_t numElements;
4405     };
4406 
4407     Case cases[] = {
4408         {"scalar_sint", scalarInterfaceOpCall, scalarInterfaceOpFunc, scalarPreMain, "i32", "i16", "1", "OpSConvert", 4,
4409          1},
4410         {"scalar_uint", scalarInterfaceOpCall, scalarInterfaceOpFunc, scalarPreMain, "u32", "u16", "0", "OpUConvert", 4,
4411          1},
4412         {"vector_sint", vecInterfaceOpCall, vecInterfaceOpFunc, vecPreMain, "v4i32", "v4i16", "1", "OpSConvert", 4 * 4,
4413          4},
4414         {"vector_uint", vecInterfaceOpCall, vecInterfaceOpFunc, vecPreMain, "v4u32", "v4u16", "0", "OpUConvert", 4 * 4,
4415          4},
4416     };
4417 
4418     VulkanFeatures requiredFeatures;
4419     requiredFeatures.coreFeatures.shaderInt16             = true;
4420     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
4421 
4422     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4423     {
4424         map<string, string> specs;
4425 
4426         specs["type32"]  = cases[caseIdx].type32;
4427         specs["type16"]  = cases[caseIdx].type16;
4428         specs["signed"]  = cases[caseIdx].sign;
4429         specs["convert"] = cases[caseIdx].opcode;
4430 
4431         fragments["pre_main"]          = cases[caseIdx].preMain.specialize(specs);
4432         fragments["interface_op_call"] = cases[caseIdx].interfaceOpCall.specialize(specs);
4433         fragments["interface_op_func"] = cases[caseIdx].interfaceOpFunc.specialize(specs);
4434         fragments["input_type"]        = cases[caseIdx].type32;
4435         fragments["output_type"]       = cases[caseIdx].type16;
4436 
4437         GraphicsInterfaces interfaces;
4438         const uint32_t numPerCase = cases[caseIdx].numPerCase;
4439         vector<int32_t> subInputs(numPerCase);
4440         vector<int16_t> subOutputs(numPerCase);
4441 
4442         for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
4443         {
4444             string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
4445 
4446             for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
4447             {
4448                 subInputs[numNdx]  = inputs[caseNdx * numPerCase + numNdx];
4449                 subOutputs[numNdx] = outputs[caseNdx * numPerCase + numNdx];
4450             }
4451             if (strcmp(cases[caseIdx].sign, "1") == 0)
4452             {
4453                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32),
4454                                                          BufferSp(new Int32Buffer(subInputs))),
4455                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16),
4456                                                          BufferSp(new Int16Buffer(subOutputs))));
4457             }
4458             else
4459             {
4460                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32),
4461                                                          BufferSp(new Int32Buffer(subInputs))),
4462                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16),
4463                                                          BufferSp(new Int16Buffer(subOutputs))));
4464             }
4465             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
4466                                     testGroup, requiredFeatures);
4467         }
4468     }
4469 }
4470 
addGraphics16BitStorageInputOutputInt16To32Group(tcu::TestCaseGroup * testGroup)4471 void addGraphics16BitStorageInputOutputInt16To32Group(tcu::TestCaseGroup *testGroup)
4472 {
4473     de::Random rnd(deStringHash(testGroup->getName()));
4474     RGBA defaultColors[4];
4475     vector<string> extensions;
4476     map<string, string> fragments = passthruFragments();
4477     const uint32_t numDataPoints  = 64;
4478     // inputs and outputs are declared to be vectors of signed integers.
4479     // However, depending on the test, they may be interpreted as unsiged
4480     // integers. That won't be a problem as long as we passed the bits
4481     // in faithfully to the pipeline.
4482     vector<int16_t> inputs = getInt16s(rnd, numDataPoints);
4483     vector<int32_t> sOutputs;
4484     vector<int32_t> uOutputs;
4485     const uint16_t signBitMask    = 0x8000;
4486     const uint32_t signExtendMask = 0xffff0000;
4487 
4488     sOutputs.reserve(inputs.size());
4489     uOutputs.reserve(inputs.size());
4490 
4491     for (uint32_t numNdx = 0; numNdx < inputs.size(); ++numNdx)
4492     {
4493         uOutputs.push_back(static_cast<uint16_t>(inputs[numNdx]));
4494         if (inputs[numNdx] & signBitMask)
4495             sOutputs.push_back(static_cast<int32_t>(inputs[numNdx] | signExtendMask));
4496         else
4497             sOutputs.push_back(static_cast<int32_t>(inputs[numNdx]));
4498     }
4499 
4500     extensions.push_back("VK_KHR_16bit_storage");
4501 
4502     fragments["capability"] = "OpCapability StorageInputOutput16\n";
4503     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
4504 
4505     getDefaultColors(defaultColors);
4506 
4507     const StringTemplate scalarIfOpCall("${convert} %${type32}");
4508 
4509     const StringTemplate scalarIfOpFunc("");
4510 
4511     const StringTemplate scalarPreMain("             %${type16} = OpTypeInt 16 ${signed}\n"
4512                                        "          %ip_${type16} = OpTypePointer Input %${type16}\n"
4513                                        "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4514                                        "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4515                                        "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
4516                                        "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4517                                        "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
4518 
4519     const StringTemplate vecIfOpCall("${convert} %${type32}");
4520 
4521     const StringTemplate vecIfOpFunc("");
4522 
4523     const StringTemplate vecPreMain("                    %i16 = OpTypeInt 16 1\n"
4524                                     "                    %u16 = OpTypeInt 16 0\n"
4525                                     "                 %v4i16 = OpTypeVector %i16 4\n"
4526                                     "                 %v4u16 = OpTypeVector %u16 4\n"
4527                                     "          %ip_${type16} = OpTypePointer Input %${type16}\n"
4528                                     "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4529                                     "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4530                                     "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
4531                                     "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
4532                                     "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
4533 
4534     struct Case
4535     {
4536         const char *name;
4537         const StringTemplate &interfaceOpCall;
4538         const StringTemplate &interfaceOpFunc;
4539         const StringTemplate &preMain;
4540         const char *type32;
4541         const char *type16;
4542         const char *sign;
4543         const char *opcode;
4544         uint32_t numPerCase;
4545         uint32_t numElements;
4546     };
4547 
4548     Case cases[] = {
4549         {"scalar_sint", scalarIfOpCall, scalarIfOpFunc, scalarPreMain, "i32", "i16", "1", "OpSConvert", 4, 1},
4550         {"scalar_uint", scalarIfOpCall, scalarIfOpFunc, scalarPreMain, "u32", "u16", "0", "OpUConvert", 4, 1},
4551         {"vector_sint", vecIfOpCall, vecIfOpFunc, vecPreMain, "v4i32", "v4i16", "1", "OpSConvert", 4 * 4, 4},
4552         {"vector_uint", vecIfOpCall, vecIfOpFunc, vecPreMain, "v4u32", "v4u16", "0", "OpUConvert", 4 * 4, 4},
4553     };
4554 
4555     VulkanFeatures requiredFeatures;
4556     requiredFeatures.coreFeatures.shaderInt16             = true;
4557     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
4558 
4559     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4560     {
4561         map<string, string> specs;
4562 
4563         specs["type32"]  = cases[caseIdx].type32;
4564         specs["type16"]  = cases[caseIdx].type16;
4565         specs["signed"]  = cases[caseIdx].sign;
4566         specs["convert"] = cases[caseIdx].opcode;
4567 
4568         fragments["pre_main"]          = cases[caseIdx].preMain.specialize(specs);
4569         fragments["interface_op_call"] = cases[caseIdx].interfaceOpCall.specialize(specs);
4570         fragments["interface_op_func"] = cases[caseIdx].interfaceOpFunc.specialize(specs);
4571         fragments["input_type"]        = cases[caseIdx].type16;
4572         fragments["output_type"]       = cases[caseIdx].type32;
4573 
4574         GraphicsInterfaces interfaces;
4575         const uint32_t numPerCase = cases[caseIdx].numPerCase;
4576         vector<int16_t> subInputs(numPerCase);
4577         vector<int32_t> subOutputs(numPerCase);
4578 
4579         for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
4580         {
4581             string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
4582 
4583             for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
4584             {
4585                 subInputs[numNdx] = inputs[caseNdx * numPerCase + numNdx];
4586                 if (cases[caseIdx].sign[0] == '1')
4587                     subOutputs[numNdx] = sOutputs[caseNdx * numPerCase + numNdx];
4588                 else
4589                     subOutputs[numNdx] = uOutputs[caseNdx * numPerCase + numNdx];
4590             }
4591             if (strcmp(cases[caseIdx].sign, "1") == 0)
4592             {
4593                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16),
4594                                                          BufferSp(new Int16Buffer(subInputs))),
4595                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32),
4596                                                          BufferSp(new Int32Buffer(subOutputs))));
4597             }
4598             else
4599             {
4600                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16),
4601                                                          BufferSp(new Int16Buffer(subInputs))),
4602                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32),
4603                                                          BufferSp(new Int32Buffer(subOutputs))));
4604             }
4605             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
4606                                     testGroup, requiredFeatures);
4607         }
4608     }
4609 }
4610 
addGraphics16BitStorageInputOutputInt16To16Group(tcu::TestCaseGroup * testGroup)4611 void addGraphics16BitStorageInputOutputInt16To16Group(tcu::TestCaseGroup *testGroup)
4612 {
4613     de::Random rnd(deStringHash(testGroup->getName()));
4614     RGBA defaultColors[4];
4615     vector<string> extensions;
4616     map<string, string> fragments = passthruFragments();
4617     const uint32_t numDataPoints  = 64;
4618     // inputs and outputs are declared to be vectors of signed integers.
4619     // However, depending on the test, they may be interpreted as unsiged
4620     // integers. That won't be a problem as long as we passed the bits
4621     // in faithfully to the pipeline.
4622     vector<int16_t> inputs = getInt16s(rnd, numDataPoints);
4623     VulkanFeatures requiredFeatures;
4624 
4625     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
4626     extensions.push_back("VK_KHR_16bit_storage");
4627 
4628     fragments["capability"] = "OpCapability StorageInputOutput16\n";
4629     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
4630 
4631     getDefaultColors(defaultColors);
4632 
4633     const StringTemplate scalarIfOpCall("OpCopyObject %${type16}");
4634 
4635     const StringTemplate scalarIfOpFunc("");
4636 
4637     const StringTemplate scalarPreMain("             %${type16} = OpTypeInt 16 ${signed}\n"
4638                                        "          %ip_${type16} = OpTypePointer Input %${type16}\n"
4639                                        "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4640                                        "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4641                                        "%${type16}_${type16}_function = OpTypeFunction %${type16} %${type16}\n"
4642                                        "          %op_${type16} = OpTypePointer Output %${type16}\n"
4643                                        "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n");
4644 
4645     const StringTemplate vecIfOpCall("OpCopyObject %${type16}");
4646 
4647     const StringTemplate vecIfOpFunc("");
4648 
4649     const StringTemplate vecPreMain("                   %i16 = OpTypeInt 16 1\n"
4650                                     "                   %u16 = OpTypeInt 16 0\n"
4651                                     "                 %v4i16 = OpTypeVector %i16 4\n"
4652                                     "                 %v4u16 = OpTypeVector %u16 4\n"
4653                                     "          %ip_${type16} = OpTypePointer Input %${type16}\n"
4654                                     "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
4655                                     "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
4656                                     "%${type16}_${type16}_function = OpTypeFunction %${type16} %${type16}\n"
4657                                     "          %op_${type16} = OpTypePointer Output %${type16}\n"
4658                                     "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n");
4659 
4660     struct Case
4661     {
4662         const char *name;
4663         const StringTemplate &interfaceOpCall;
4664         const StringTemplate &interfaceOpFunc;
4665         const StringTemplate &preMain;
4666         const char *type16;
4667         const char *sign;
4668         uint32_t numPerCase;
4669         uint32_t numElements;
4670     };
4671 
4672     Case cases[] = {
4673         {"scalar_sint", scalarIfOpCall, scalarIfOpFunc, scalarPreMain, "i16", "1", 4, 1},
4674         {"scalar_uint", scalarIfOpCall, scalarIfOpFunc, scalarPreMain, "u16", "0", 4, 1},
4675         {"vector_sint", vecIfOpCall, vecIfOpFunc, vecPreMain, "v4i16", "1", 4 * 4, 4},
4676         {"vector_uint", vecIfOpCall, vecIfOpFunc, vecPreMain, "v4u16", "0", 4 * 4, 4},
4677     };
4678 
4679     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
4680     {
4681         map<string, string> specs;
4682 
4683         specs["type16"] = cases[caseIdx].type16;
4684         specs["signed"] = cases[caseIdx].sign;
4685 
4686         fragments["pre_main"]          = cases[caseIdx].preMain.specialize(specs);
4687         fragments["interface_op_call"] = cases[caseIdx].interfaceOpCall.specialize(specs);
4688         fragments["interface_op_func"] = cases[caseIdx].interfaceOpFunc.specialize(specs);
4689         fragments["input_type"]        = cases[caseIdx].type16;
4690         fragments["output_type"]       = cases[caseIdx].type16;
4691 
4692         GraphicsInterfaces interfaces;
4693         const uint32_t numPerCase = cases[caseIdx].numPerCase;
4694         vector<int16_t> subInputsOutputs(numPerCase);
4695         const NumberType numberType = strcmp(cases[caseIdx].sign, "1") == 0 ? NUMBERTYPE_INT16 : NUMBERTYPE_UINT16;
4696 
4697         for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
4698         {
4699             string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
4700 
4701             for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
4702                 subInputsOutputs[numNdx] = inputs[caseNdx * numPerCase + numNdx];
4703 
4704             interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, numberType),
4705                                                      BufferSp(new Int16Buffer(subInputsOutputs))),
4706                                       std::make_pair(IFDataType(cases[caseIdx].numElements, numberType),
4707                                                      BufferSp(new Int16Buffer(subInputsOutputs))));
4708 
4709             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
4710                                     testGroup, requiredFeatures);
4711         }
4712     }
4713 }
4714 
addGraphics16BitStoragePushConstantFloat16To32Group(tcu::TestCaseGroup * testGroup)4715 void addGraphics16BitStoragePushConstantFloat16To32Group(tcu::TestCaseGroup *testGroup)
4716 {
4717     de::Random rnd(deStringHash(testGroup->getName()));
4718     map<string, string> fragments;
4719     RGBA defaultColors[4];
4720     vector<string> extensions;
4721     GraphicsResources resources;
4722     PushConstants pcs;
4723     const uint32_t numDataPoints = 64;
4724     vector<deFloat16> float16Data(getFloat16s(rnd, numDataPoints));
4725     vector<float> float32Data;
4726     VulkanFeatures requiredFeatures;
4727 
4728     struct ConstantIndex
4729     {
4730         bool useConstantIndex;
4731         uint32_t constantIndex;
4732     };
4733 
4734     ConstantIndex constantIndices[] = {{false, 0}, {true, 4}, {true, 5}, {true, 6}};
4735 
4736     float32Data.reserve(numDataPoints);
4737     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
4738         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
4739 
4740     extensions.push_back("VK_KHR_16bit_storage");
4741 
4742     requiredFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
4743     requiredFeatures.coreFeatures.fragmentStoresAndAtomics       = true;
4744     requiredFeatures.ext16BitStorage.storagePushConstant16       = true;
4745 
4746     fragments["capability"] = "OpCapability StoragePushConstant16\n";
4747     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
4748 
4749     pcs.setPushConstant(BufferSp(new Float16Buffer(float16Data)));
4750     resources.verifyIO = check32BitFloats;
4751 
4752     getDefaultColors(defaultColors);
4753 
4754     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
4755                                  "    %param = OpFunctionParameter %v4f32\n"
4756 
4757                                  "%entry = OpLabel\n"
4758                                  "    %i = OpVariable %fp_i32 Function\n"
4759                                  "         OpStore %i %c_i32_0\n"
4760                                  "         OpBranch %loop\n"
4761 
4762                                  " %loop = OpLabel\n"
4763                                  "   %15 = OpLoad %i32 %i\n"
4764                                  "   %lt = OpSLessThan %bool %15 ${count}\n"
4765                                  "         OpLoopMerge %merge %inc None\n"
4766                                  "         OpBranchConditional %lt %write %merge\n"
4767 
4768                                  "%write = OpLabel\n"
4769                                  "   %30 = OpLoad %i32 %i\n"
4770                                  "  %src = OpAccessChain ${pp_type16} %pc16 %c_i32_0 %${arrayindex} ${index0:opt}\n"
4771                                  "%val16 = OpLoad ${f_type16} %src\n"
4772                                  "%val32 = OpFConvert ${f_type32} %val16\n"
4773                                  "  %dst = OpAccessChain ${up_type32} %ssbo32 %c_i32_0 %30 ${index0:opt}\n"
4774                                  "         OpStore %dst %val32\n"
4775 
4776                                  "${store:opt}\n"
4777 
4778                                  "         OpBranch %inc\n"
4779 
4780                                  "  %inc = OpLabel\n"
4781                                  "   %37 = OpLoad %i32 %i\n"
4782                                  "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
4783                                  "         OpStore %i %39\n"
4784                                  "         OpBranch %loop\n"
4785 
4786                                  "%merge = OpLabel\n"
4787                                  "         OpReturnValue %param\n"
4788 
4789                                  "OpFunctionEnd\n");
4790 
4791     { // Scalar cases
4792         const StringTemplate preMain("      %f16 = OpTypeFloat 16\n"
4793                                      " %c_i32_64 = OpConstant %i32 64\n"
4794                                      " %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
4795                                      "  %a64f16 = OpTypeArray %f16 %c_i32_64\n"
4796                                      "  %a64f32 = OpTypeArray %f32 %c_i32_64\n"
4797                                      "   %pp_f16 = OpTypePointer PushConstant %f16\n"
4798                                      "   %up_f32 = OpTypePointer Uniform %f32\n"
4799                                      "   %SSBO32 = OpTypeStruct %a64f32\n"
4800                                      "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
4801                                      "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
4802                                      "     %PC16 = OpTypeStruct %a64f16\n"
4803                                      "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
4804                                      "     %pc16 = OpVariable %pp_PC16 PushConstant\n");
4805 
4806         fragments["decoration"] = "OpDecorate %a64f16 ArrayStride 2\n"
4807                                   "OpDecorate %a64f32 ArrayStride 4\n"
4808                                   "OpDecorate %SSBO32 BufferBlock\n"
4809                                   "OpMemberDecorate %SSBO32 0 Offset 0\n"
4810                                   "OpDecorate %PC16 Block\n"
4811                                   "OpMemberDecorate %PC16 0 Offset 0\n"
4812                                   "OpDecorate %ssbo32 DescriptorSet 0\n"
4813                                   "OpDecorate %ssbo32 Binding 0\n";
4814 
4815         map<string, string> specs;
4816 
4817         specs["count"]     = "%c_i32_64";
4818         specs["pp_type16"] = "%pp_f16";
4819         specs["f_type16"]  = "%f16";
4820         specs["f_type32"]  = "%f32";
4821         specs["up_type32"] = "%up_f32";
4822 
4823         for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
4824         {
4825             bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
4826             uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
4827             string testName   = "scalar";
4828             vector<float> float32ConstIdxData;
4829 
4830             if (useConstIdx)
4831             {
4832                 float32ConstIdxData.reserve(numDataPoints);
4833 
4834                 for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
4835                     float32ConstIdxData.push_back(float32Data[constIdx]);
4836             }
4837 
4838             specs["constarrayidx"] = de::toString(constIdx);
4839             if (useConstIdx)
4840                 specs["arrayindex"] = "c_i32_ci";
4841             else
4842                 specs["arrayindex"] = "30";
4843 
4844             resources.outputs.clear();
4845             resources.outputs.push_back(
4846                 Resource(BufferSp(new Float32Buffer(useConstIdx ? float32ConstIdxData : float32Data)),
4847                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4848 
4849             fragments["pre_main"] = preMain.specialize(specs);
4850             fragments["testfun"]  = testFun.specialize(specs);
4851 
4852             if (useConstIdx)
4853                 testName += string("_const_idx_") + de::toString(constIdx);
4854 
4855             createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources,
4856                                     extensions, testGroup, requiredFeatures);
4857         }
4858     }
4859 
4860     { // Vector cases
4861         const StringTemplate preMain("      %f16 = OpTypeFloat 16\n"
4862                                      "    %v4f16 = OpTypeVector %f16 4\n"
4863                                      " %c_i32_16 = OpConstant %i32 16\n"
4864                                      " %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
4865                                      " %a16v4f16 = OpTypeArray %v4f16 %c_i32_16\n"
4866                                      " %a16v4f32 = OpTypeArray %v4f32 %c_i32_16\n"
4867                                      " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
4868                                      " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
4869                                      "   %SSBO32 = OpTypeStruct %a16v4f32\n"
4870                                      "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
4871                                      "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
4872                                      "     %PC16 = OpTypeStruct %a16v4f16\n"
4873                                      "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
4874                                      "     %pc16 = OpVariable %pp_PC16 PushConstant\n");
4875 
4876         fragments["decoration"] = "OpDecorate %a16v4f16 ArrayStride 8\n"
4877                                   "OpDecorate %a16v4f32 ArrayStride 16\n"
4878                                   "OpDecorate %SSBO32 BufferBlock\n"
4879                                   "OpMemberDecorate %SSBO32 0 Offset 0\n"
4880                                   "OpDecorate %PC16 Block\n"
4881                                   "OpMemberDecorate %PC16 0 Offset 0\n"
4882                                   "OpDecorate %ssbo32 DescriptorSet 0\n"
4883                                   "OpDecorate %ssbo32 Binding 0\n";
4884 
4885         map<string, string> specs;
4886 
4887         specs["count"]     = "%c_i32_16";
4888         specs["pp_type16"] = "%pp_v4f16";
4889         specs["f_type16"]  = "%v4f16";
4890         specs["f_type32"]  = "%v4f32";
4891         specs["up_type32"] = "%up_v4f32";
4892 
4893         for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
4894         {
4895             bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
4896             uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
4897             string testName   = "vector";
4898             vector<float> float32ConstIdxData;
4899 
4900             if (useConstIdx)
4901             {
4902                 float32ConstIdxData.reserve(numDataPoints);
4903 
4904                 for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
4905                     float32ConstIdxData.push_back(float32Data[constIdx * 4 + numIdx % 4]);
4906             }
4907 
4908             specs["constarrayidx"] = de::toString(constIdx);
4909             if (useConstIdx)
4910                 specs["arrayindex"] = "c_i32_ci";
4911             else
4912                 specs["arrayindex"] = "30";
4913 
4914             resources.outputs.clear();
4915             resources.outputs.push_back(
4916                 Resource(BufferSp(new Float32Buffer(useConstIdx ? float32ConstIdxData : float32Data)),
4917                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4918 
4919             fragments["pre_main"] = preMain.specialize(specs);
4920             fragments["testfun"]  = testFun.specialize(specs);
4921 
4922             if (useConstIdx)
4923                 testName += string("_const_idx_") + de::toString(constIdx);
4924 
4925             createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources,
4926                                     extensions, testGroup, requiredFeatures);
4927         }
4928     }
4929 
4930     { // Matrix cases
4931         const StringTemplate preMain("   %c_i32_8 = OpConstant %i32 8\n"
4932                                      "  %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
4933                                      "      %f16  = OpTypeFloat 16\n"
4934                                      "    %v4f16  = OpTypeVector %f16 4\n"
4935                                      "  %m2v4f16  = OpTypeMatrix %v4f16 2\n"
4936                                      "  %m2v4f32  = OpTypeMatrix %v4f32 2\n"
4937                                      " %a8m2v4f16 = OpTypeArray %m2v4f16 %c_i32_8\n"
4938                                      " %a8m2v4f32 = OpTypeArray %m2v4f32 %c_i32_8\n"
4939                                      " %pp_v4f16  = OpTypePointer PushConstant %v4f16\n"
4940                                      " %up_v4f32  = OpTypePointer Uniform %v4f32\n"
4941                                      "   %SSBO32  = OpTypeStruct %a8m2v4f32\n"
4942                                      "%up_SSBO32  = OpTypePointer Uniform %SSBO32\n"
4943                                      "   %ssbo32  = OpVariable %up_SSBO32 Uniform\n"
4944                                      "     %PC16  = OpTypeStruct %a8m2v4f16\n"
4945                                      "  %pp_PC16  = OpTypePointer PushConstant %PC16\n"
4946                                      "     %pc16  = OpVariable %pp_PC16 PushConstant\n");
4947 
4948         fragments["decoration"] = "OpDecorate %a8m2v4f16 ArrayStride 16\n"
4949                                   "OpDecorate %a8m2v4f32 ArrayStride 32\n"
4950                                   "OpDecorate %SSBO32 BufferBlock\n"
4951                                   "OpMemberDecorate %SSBO32 0 Offset 0\n"
4952                                   "OpMemberDecorate %SSBO32 0 ColMajor\n"
4953                                   "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
4954                                   "OpDecorate %PC16 Block\n"
4955                                   "OpMemberDecorate %PC16 0 Offset 0\n"
4956                                   "OpMemberDecorate %PC16 0 ColMajor\n"
4957                                   "OpMemberDecorate %PC16 0 MatrixStride 8\n"
4958                                   "OpDecorate %ssbo32 DescriptorSet 0\n"
4959                                   "OpDecorate %ssbo32 Binding 0\n";
4960 
4961         map<string, string> specs;
4962 
4963         specs["count"]     = "%c_i32_8";
4964         specs["pp_type16"] = "%pp_v4f16";
4965         specs["up_type32"] = "%up_v4f32";
4966         specs["f_type16"]  = "%v4f16";
4967         specs["f_type32"]  = "%v4f32";
4968         specs["index0"]    = "%c_i32_0";
4969 
4970         for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
4971         {
4972             bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
4973             uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
4974             string testName   = "matrix";
4975             vector<float> float32ConstIdxData;
4976             const StringTemplate store("  %src_1 = OpAccessChain %pp_v4f16 %pc16 %c_i32_0 %${arrayindex} %c_i32_1\n"
4977                                        "%val16_1 = OpLoad %v4f16 %src_1\n"
4978                                        "%val32_1 = OpFConvert %v4f32 %val16_1\n"
4979                                        "  %dst_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
4980                                        "           OpStore %dst_1 %val32_1\n");
4981 
4982             if (useConstIdx)
4983             {
4984                 float32ConstIdxData.reserve(numDataPoints);
4985 
4986                 for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
4987                     float32ConstIdxData.push_back(float32Data[constIdx * 8 + numIdx % 8]);
4988             }
4989 
4990             specs["constarrayidx"] = de::toString(constIdx);
4991             if (useConstIdx)
4992                 specs["arrayindex"] = "c_i32_ci";
4993             else
4994                 specs["arrayindex"] = "30";
4995 
4996             specs["store"] = store.specialize(specs);
4997 
4998             resources.outputs.clear();
4999             resources.outputs.push_back(
5000                 Resource(BufferSp(new Float32Buffer(useConstIdx ? float32ConstIdxData : float32Data)),
5001                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5002 
5003             fragments["pre_main"] = preMain.specialize(specs);
5004             fragments["testfun"]  = testFun.specialize(specs);
5005 
5006             if (useConstIdx)
5007                 testName += string("_const_idx_") + de::toString(constIdx);
5008 
5009             createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources,
5010                                     extensions, testGroup, requiredFeatures);
5011         }
5012     }
5013 }
5014 
addGraphics16BitStoragePushConstantInt16To32Group(tcu::TestCaseGroup * testGroup)5015 void addGraphics16BitStoragePushConstantInt16To32Group(tcu::TestCaseGroup *testGroup)
5016 {
5017     de::Random rnd(deStringHash(testGroup->getName()));
5018     map<string, string> fragments;
5019     RGBA defaultColors[4];
5020     const uint32_t numDataPoints = 64;
5021     vector<int16_t> inputs       = getInt16s(rnd, numDataPoints);
5022     vector<int32_t> sOutputs;
5023     vector<int32_t> uOutputs;
5024     PushConstants pcs;
5025     GraphicsResources resources;
5026     vector<string> extensions;
5027     const uint16_t signBitMask    = 0x8000;
5028     const uint32_t signExtendMask = 0xffff0000;
5029     VulkanFeatures requiredFeatures;
5030 
5031     struct ConstantIndex
5032     {
5033         bool useConstantIndex;
5034         uint32_t constantIndex;
5035     };
5036 
5037     ConstantIndex constantIndices[] = {{false, 0}, {true, 4}, {true, 5}, {true, 6}};
5038 
5039     sOutputs.reserve(inputs.size());
5040     uOutputs.reserve(inputs.size());
5041 
5042     for (uint32_t numNdx = 0; numNdx < inputs.size(); ++numNdx)
5043     {
5044         uOutputs.push_back(static_cast<uint16_t>(inputs[numNdx]));
5045         if (inputs[numNdx] & signBitMask)
5046             sOutputs.push_back(static_cast<int32_t>(inputs[numNdx] | signExtendMask));
5047         else
5048             sOutputs.push_back(static_cast<int32_t>(inputs[numNdx]));
5049     }
5050 
5051     extensions.push_back("VK_KHR_16bit_storage");
5052 
5053     requiredFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
5054     requiredFeatures.coreFeatures.fragmentStoresAndAtomics       = true;
5055     requiredFeatures.ext16BitStorage.storagePushConstant16       = true;
5056 
5057     fragments["capability"] = "OpCapability StoragePushConstant16\n";
5058     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
5059 
5060     pcs.setPushConstant(BufferSp(new Int16Buffer(inputs)));
5061 
5062     getDefaultColors(defaultColors);
5063 
5064     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5065                                  "    %param = OpFunctionParameter %v4f32\n"
5066 
5067                                  "%entry = OpLabel\n"
5068                                  "    %i = OpVariable %fp_i32 Function\n"
5069                                  "         OpStore %i %c_i32_0\n"
5070                                  "         OpBranch %loop\n"
5071 
5072                                  " %loop = OpLabel\n"
5073                                  "   %15 = OpLoad %i32 %i\n"
5074                                  "   %lt = OpSLessThan %bool %15 %c_i32_${count}\n"
5075                                  "         OpLoopMerge %merge %inc None\n"
5076                                  "         OpBranchConditional %lt %write %merge\n"
5077 
5078                                  "%write = OpLabel\n"
5079                                  "   %30 = OpLoad %i32 %i\n"
5080                                  "  %src = OpAccessChain %pp_${type16} %pc16 %c_i32_0 %${arrayindex}\n"
5081                                  "%val16 = OpLoad %${type16} %src\n"
5082                                  "%val32 = ${convert} %${type32} %val16\n"
5083                                  "  %dst = OpAccessChain %up_${type32} %ssbo32 %c_i32_0 %30\n"
5084                                  "         OpStore %dst %val32\n"
5085                                  "         OpBranch %inc\n"
5086 
5087                                  "  %inc = OpLabel\n"
5088                                  "   %37 = OpLoad %i32 %i\n"
5089                                  "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5090                                  "         OpStore %i %39\n"
5091                                  "         OpBranch %loop\n"
5092 
5093                                  "%merge = OpLabel\n"
5094                                  "         OpReturnValue %param\n"
5095 
5096                                  "OpFunctionEnd\n");
5097 
5098     { // Scalar cases
5099         const StringTemplate preMain(
5100             "         %${type16} = OpTypeInt 16 ${signed}\n"
5101             "    %c_i32_${count} = OpConstant %i32 ${count}\n" // Should be the same as numDataPoints
5102             "          %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
5103             "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
5104             "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
5105             "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
5106             "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
5107             "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
5108             "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5109             "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5110             "              %PC16 = OpTypeStruct %a${count}${type16}\n"
5111             "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
5112             "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
5113 
5114         const StringTemplate decoration("OpDecorate %a${count}${type16} ArrayStride 2\n"
5115                                         "OpDecorate %a${count}${type32} ArrayStride 4\n"
5116                                         "OpDecorate %SSBO32 BufferBlock\n"
5117                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
5118                                         "OpDecorate %PC16 Block\n"
5119                                         "OpMemberDecorate %PC16 0 Offset 0\n"
5120                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
5121                                         "OpDecorate %ssbo32 Binding 0\n");
5122 
5123         { // signed int
5124             map<string, string> specs;
5125 
5126             specs["type16"]  = "i16";
5127             specs["type32"]  = "i32";
5128             specs["signed"]  = "1";
5129             specs["count"]   = "64";
5130             specs["convert"] = "OpSConvert";
5131 
5132             for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5133             {
5134                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
5135                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
5136                 string testName   = "sint_scalar";
5137                 vector<int32_t> constIdxData;
5138 
5139                 if (useConstIdx)
5140                 {
5141                     constIdxData.reserve(numDataPoints);
5142 
5143                     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
5144                         constIdxData.push_back(sOutputs[constIdx]);
5145                 }
5146 
5147                 specs["constarrayidx"] = de::toString(constIdx);
5148                 if (useConstIdx)
5149                     specs["arrayindex"] = "c_i32_ci";
5150                 else
5151                     specs["arrayindex"] = "30";
5152 
5153                 if (useConstIdx)
5154                     testName += string("_const_idx_") + de::toString(constIdx);
5155 
5156                 resources.outputs.clear();
5157                 resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : sOutputs)),
5158                                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5159 
5160                 fragments["testfun"]    = testFun.specialize(specs);
5161                 fragments["pre_main"]   = preMain.specialize(specs);
5162                 fragments["decoration"] = decoration.specialize(specs);
5163 
5164                 createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources,
5165                                         extensions, testGroup, requiredFeatures);
5166             }
5167         }
5168         { // unsigned int
5169             map<string, string> specs;
5170 
5171             specs["type16"]  = "u16";
5172             specs["type32"]  = "u32";
5173             specs["signed"]  = "0";
5174             specs["count"]   = "64";
5175             specs["convert"] = "OpUConvert";
5176 
5177             for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5178             {
5179                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
5180                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
5181                 string testName   = "uint_scalar";
5182                 vector<int32_t> constIdxData;
5183 
5184                 if (useConstIdx)
5185                 {
5186                     constIdxData.reserve(numDataPoints);
5187 
5188                     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
5189                         constIdxData.push_back(uOutputs[constIdx]);
5190                 }
5191 
5192                 specs["constarrayidx"] = de::toString(constIdx);
5193                 if (useConstIdx)
5194                     specs["arrayindex"] = "c_i32_ci";
5195                 else
5196                     specs["arrayindex"] = "30";
5197 
5198                 if (useConstIdx)
5199                     testName += string("_const_idx_") + de::toString(constIdx);
5200 
5201                 resources.outputs.clear();
5202                 resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : uOutputs)),
5203                                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5204 
5205                 fragments["testfun"]    = testFun.specialize(specs);
5206                 fragments["pre_main"]   = preMain.specialize(specs);
5207                 fragments["decoration"] = decoration.specialize(specs);
5208 
5209                 createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources,
5210                                         extensions, testGroup, requiredFeatures);
5211             }
5212         }
5213     }
5214 
5215     { // Vector cases
5216         const StringTemplate preMain("    %${base_type16} = OpTypeInt 16 ${signed}\n"
5217                                      "         %${type16} = OpTypeVector %${base_type16} 2\n"
5218                                      "    %c_i32_${count} = OpConstant %i32 ${count}\n"
5219                                      "          %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
5220                                      "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
5221                                      "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
5222                                      "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
5223                                      "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
5224                                      "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
5225                                      "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5226                                      "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5227                                      "              %PC16 = OpTypeStruct %a${count}${type16}\n"
5228                                      "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
5229                                      "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
5230 
5231         const StringTemplate decoration("OpDecorate %a${count}${type16} ArrayStride 4\n"
5232                                         "OpDecorate %a${count}${type32} ArrayStride 8\n"
5233                                         "OpDecorate %SSBO32 BufferBlock\n"
5234                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
5235                                         "OpDecorate %PC16 Block\n"
5236                                         "OpMemberDecorate %PC16 0 Offset 0\n"
5237                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
5238                                         "OpDecorate %ssbo32 Binding 0\n");
5239 
5240         { // signed int
5241             map<string, string> specs;
5242 
5243             specs["base_type16"] = "i16";
5244             specs["type16"]      = "v2i16";
5245             specs["type32"]      = "v2i32";
5246             specs["signed"]      = "1";
5247             specs["count"]       = "32";
5248             specs["convert"]     = "OpSConvert";
5249 
5250             for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5251             {
5252                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
5253                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
5254                 string testName   = "sint_vector";
5255                 vector<int32_t> constIdxData;
5256 
5257                 if (useConstIdx)
5258                 {
5259                     constIdxData.reserve(numDataPoints);
5260 
5261                     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
5262                         constIdxData.push_back(sOutputs[constIdx * 2 + numIdx % 2]);
5263                 }
5264 
5265                 specs["constarrayidx"] = de::toString(constIdx);
5266                 if (useConstIdx)
5267                     specs["arrayindex"] = "c_i32_ci";
5268                 else
5269                     specs["arrayindex"] = "30";
5270 
5271                 if (useConstIdx)
5272                     testName += string("_const_idx_") + de::toString(constIdx);
5273 
5274                 resources.outputs.clear();
5275                 resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : sOutputs)),
5276                                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5277 
5278                 fragments["testfun"]    = testFun.specialize(specs);
5279                 fragments["pre_main"]   = preMain.specialize(specs);
5280                 fragments["decoration"] = decoration.specialize(specs);
5281 
5282                 createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources,
5283                                         extensions, testGroup, requiredFeatures);
5284             }
5285         }
5286         { // unsigned int
5287             map<string, string> specs;
5288 
5289             specs["base_type16"] = "u16";
5290             specs["type16"]      = "v2u16";
5291             specs["type32"]      = "v2u32";
5292             specs["signed"]      = "0";
5293             specs["count"]       = "32";
5294             specs["convert"]     = "OpUConvert";
5295 
5296             for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5297             {
5298                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
5299                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
5300                 string testName   = "uint_vector";
5301                 vector<int32_t> constIdxData;
5302 
5303                 if (useConstIdx)
5304                 {
5305                     constIdxData.reserve(numDataPoints);
5306 
5307                     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
5308                         constIdxData.push_back(uOutputs[constIdx * 2 + numIdx % 2]);
5309                 }
5310 
5311                 specs["constarrayidx"] = de::toString(constIdx);
5312                 if (useConstIdx)
5313                     specs["arrayindex"] = "c_i32_ci";
5314                 else
5315                     specs["arrayindex"] = "30";
5316 
5317                 if (useConstIdx)
5318                     testName += string("_const_idx_") + de::toString(constIdx);
5319 
5320                 resources.outputs.clear();
5321                 resources.outputs.push_back(Resource(BufferSp(new Int32Buffer(useConstIdx ? constIdxData : uOutputs)),
5322                                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5323 
5324                 fragments["testfun"]    = testFun.specialize(specs);
5325                 fragments["pre_main"]   = preMain.specialize(specs);
5326                 fragments["decoration"] = decoration.specialize(specs);
5327 
5328                 createTestsForAllStages(testName.c_str(), defaultColors, defaultColors, fragments, pcs, resources,
5329                                         extensions, testGroup, requiredFeatures);
5330             }
5331         }
5332     }
5333 }
5334 
addGraphics16BitStorageUniformInt16To32Group(tcu::TestCaseGroup * testGroup)5335 void addGraphics16BitStorageUniformInt16To32Group(tcu::TestCaseGroup *testGroup)
5336 {
5337     de::Random rnd(deStringHash(testGroup->getName()));
5338     map<string, string> fragments;
5339     const uint32_t numDataPoints = 256;
5340     RGBA defaultColors[4];
5341     vector<int16_t> inputs = getInt16s(rnd, numDataPoints);
5342     vector<int32_t> sOutputs;
5343     vector<int32_t> uOutputs;
5344     vector<string> extensions;
5345     const uint16_t signBitMask    = 0x8000;
5346     const uint32_t signExtendMask = 0xffff0000;
5347     const StringTemplate capabilities("OpCapability ${cap}\n");
5348 
5349     sOutputs.reserve(inputs.size());
5350     uOutputs.reserve(inputs.size());
5351 
5352     for (uint32_t numNdx = 0; numNdx < inputs.size(); ++numNdx)
5353     {
5354         uOutputs.push_back(static_cast<uint16_t>(inputs[numNdx]));
5355         if (inputs[numNdx] & signBitMask)
5356             sOutputs.push_back(static_cast<int32_t>(inputs[numNdx] | signExtendMask));
5357         else
5358             sOutputs.push_back(static_cast<int32_t>(inputs[numNdx]));
5359     }
5360 
5361     extensions.push_back("VK_KHR_16bit_storage");
5362     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
5363 
5364     getDefaultColors(defaultColors);
5365 
5366     struct IntegerFacts
5367     {
5368         const char *name;
5369         const char *type32;
5370         const char *type16;
5371         const char *opcode;
5372         bool isSigned;
5373     };
5374 
5375     const IntegerFacts intFacts[] = {
5376         {"sint", "%i32", "%i16", "OpSConvert", true},
5377         {"uint", "%u32", "%u16", "OpUConvert", false},
5378     };
5379 
5380     struct ConstantIndex
5381     {
5382         bool useConstantIndex;
5383         uint32_t constantIndex;
5384     };
5385 
5386     ConstantIndex constantIndices[] = {{false, 0}, {true, 4}, {true, 5}, {true, 6}};
5387 
5388     const StringTemplate scalarPreMain("${itype16} = OpTypeInt 16 ${signed}\n"
5389                                        "%c_i32_256 = OpConstant %i32 256\n"
5390                                        "%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
5391                                        "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
5392                                        "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
5393                                        "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
5394                                        "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
5395                                        "   %SSBO32 = OpTypeStruct %ra_i32\n"
5396                                        "   %SSBO16 = OpTypeStruct %ra_i16\n"
5397                                        "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5398                                        "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5399                                        "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5400                                        "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5401 
5402     const StringTemplate scalarDecoration("OpDecorate %ra_i32 ArrayStride 4\n"
5403                                           "OpDecorate %ra_i16 ArrayStride ${arraystride}\n"
5404                                           "OpMemberDecorate %SSBO32 0 Offset 0\n"
5405                                           "OpMemberDecorate %SSBO16 0 Offset 0\n"
5406                                           "OpDecorate %SSBO32 BufferBlock\n"
5407                                           "OpDecorate %SSBO16 ${indecor}\n"
5408                                           "OpDecorate %ssbo32 DescriptorSet 0\n"
5409                                           "OpDecorate %ssbo16 DescriptorSet 0\n"
5410                                           "OpDecorate %ssbo32 Binding 1\n"
5411                                           "OpDecorate %ssbo16 Binding 0\n");
5412 
5413     const StringTemplate scalarTestFunc("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5414                                         "    %param = OpFunctionParameter %v4f32\n"
5415 
5416                                         "%entry = OpLabel\n"
5417                                         "    %i = OpVariable %fp_i32 Function\n"
5418                                         "         OpStore %i %c_i32_0\n"
5419                                         "         OpBranch %loop\n"
5420 
5421                                         " %loop = OpLabel\n"
5422                                         "   %15 = OpLoad %i32 %i\n"
5423                                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
5424                                         "         OpLoopMerge %merge %inc None\n"
5425                                         "         OpBranchConditional %lt %write %merge\n"
5426 
5427                                         "%write = OpLabel\n"
5428                                         "   %30 = OpLoad %i32 %i\n"
5429                                         "  %src = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5430                                         "%val16 = OpLoad ${itype16} %src\n"
5431                                         "%val32 = ${convert} ${itype32} %val16\n"
5432                                         "  %dst = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
5433                                         "         OpStore %dst %val32\n"
5434                                         "         OpBranch %inc\n"
5435 
5436                                         "  %inc = OpLabel\n"
5437                                         "   %37 = OpLoad %i32 %i\n"
5438                                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5439                                         "         OpStore %i %39\n"
5440                                         "         OpBranch %loop\n"
5441                                         "%merge = OpLabel\n"
5442                                         "         OpReturnValue %param\n"
5443 
5444                                         "OpFunctionEnd\n");
5445 
5446     const StringTemplate vecPreMain("${itype16} = OpTypeInt 16 ${signed}\n"
5447                                     "%c_i32_128 = OpConstant %i32 128\n"
5448                                     "%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
5449                                     "%v2itype16 = OpTypeVector ${itype16} 2\n"
5450                                     " %up_v2i32 = OpTypePointer Uniform ${v2itype32}\n"
5451                                     " %up_v2i16 = OpTypePointer Uniform %v2itype16\n"
5452                                     " %ra_v2i32 = OpTypeArray ${v2itype32} %c_i32_128\n"
5453                                     " %ra_v2i16 = OpTypeArray %v2itype16 %c_i32_128\n"
5454                                     "   %SSBO32 = OpTypeStruct %ra_v2i32\n"
5455                                     "   %SSBO16 = OpTypeStruct %ra_v2i16\n"
5456                                     "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5457                                     "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5458                                     "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5459                                     "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5460 
5461     const StringTemplate vecDecoration("OpDecorate %ra_v2i32 ArrayStride 8\n"
5462                                        "OpDecorate %ra_v2i16 ArrayStride ${arraystride}\n"
5463                                        "OpMemberDecorate %SSBO32 0 Offset 0\n"
5464                                        "OpMemberDecorate %SSBO16 0 Offset 0\n"
5465                                        "OpDecorate %SSBO32 BufferBlock\n"
5466                                        "OpDecorate %SSBO16 ${indecor}\n"
5467                                        "OpDecorate %ssbo32 DescriptorSet 0\n"
5468                                        "OpDecorate %ssbo16 DescriptorSet 0\n"
5469                                        "OpDecorate %ssbo32 Binding 1\n"
5470                                        "OpDecorate %ssbo16 Binding 0\n");
5471 
5472     const StringTemplate vecTestFunc("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5473                                      "    %param = OpFunctionParameter %v4f32\n"
5474 
5475                                      "%entry = OpLabel\n"
5476                                      "    %i = OpVariable %fp_i32 Function\n"
5477                                      "         OpStore %i %c_i32_0\n"
5478                                      "         OpBranch %loop\n"
5479 
5480                                      " %loop = OpLabel\n"
5481                                      "   %15 = OpLoad %i32 %i\n"
5482                                      "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
5483                                      "         OpLoopMerge %merge %inc None\n"
5484                                      "         OpBranchConditional %lt %write %merge\n"
5485 
5486                                      "%write = OpLabel\n"
5487                                      "   %30 = OpLoad %i32 %i\n"
5488                                      "  %src = OpAccessChain %up_v2i16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5489                                      "%val16 = OpLoad %v2itype16 %src\n"
5490                                      "%val32 = ${convert} ${v2itype32} %val16\n"
5491                                      "  %dst = OpAccessChain %up_v2i32 %ssbo32 %c_i32_0 %30\n"
5492                                      "         OpStore %dst %val32\n"
5493                                      "         OpBranch %inc\n"
5494 
5495                                      "  %inc = OpLabel\n"
5496                                      "   %37 = OpLoad %i32 %i\n"
5497                                      "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5498                                      "         OpStore %i %39\n"
5499                                      "         OpBranch %loop\n"
5500                                      "%merge = OpLabel\n"
5501                                      "         OpReturnValue %param\n"
5502 
5503                                      "OpFunctionEnd\n");
5504 
5505     struct Category
5506     {
5507         const char *name;
5508         const StringTemplate &preMain;
5509         const StringTemplate &decoration;
5510         const StringTemplate &testFunction;
5511         const uint32_t numElements;
5512     };
5513 
5514     const Category categories[] = {
5515         {"scalar", scalarPreMain, scalarDecoration, scalarTestFunc, 1},
5516         {"vector", vecPreMain, vecDecoration, vecTestFunc, 2},
5517     };
5518 
5519     const uint32_t minArrayStride[] = {2, 16};
5520 
5521     for (uint32_t catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
5522         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5523             for (uint32_t factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
5524                 for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5525                 {
5526                     bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
5527                     uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
5528                     map<string, string> specs;
5529                     string name = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" +
5530                                   intFacts[factIdx].name;
5531                     const uint32_t numElements = categories[catIdx].numElements;
5532                     const uint32_t arrayStride = de::max(numElements * 2, minArrayStride[capIdx]);
5533 
5534                     specs["cap"]         = CAPABILITIES[capIdx].cap;
5535                     specs["indecor"]     = CAPABILITIES[capIdx].decor;
5536                     specs["arraystride"] = de::toString(arrayStride);
5537                     specs["itype32"]     = intFacts[factIdx].type32;
5538                     specs["v2itype32"]   = "%v2" + string(intFacts[factIdx].type32).substr(1);
5539                     specs["v3itype32"]   = "%v3" + string(intFacts[factIdx].type32).substr(1);
5540                     specs["itype16"]     = intFacts[factIdx].type16;
5541                     if (intFacts[factIdx].isSigned)
5542                         specs["signed"] = "1";
5543                     else
5544                         specs["signed"] = "0";
5545                     specs["convert"]       = intFacts[factIdx].opcode;
5546                     specs["constarrayidx"] = de::toString(constIdx);
5547                     if (useConstIdx)
5548                         specs["arrayindex"] = "c_i32_ci";
5549                     else
5550                         specs["arrayindex"] = "30";
5551 
5552                     fragments["pre_main"]   = categories[catIdx].preMain.specialize(specs);
5553                     fragments["testfun"]    = categories[catIdx].testFunction.specialize(specs);
5554                     fragments["capability"] = capabilities.specialize(specs);
5555                     fragments["decoration"] = categories[catIdx].decoration.specialize(specs);
5556 
5557                     GraphicsResources resources;
5558                     vector<int16_t> inputsPadded;
5559                     VulkanFeatures features;
5560 
5561                     for (size_t dataIdx = 0; dataIdx < inputs.size() / numElements; ++dataIdx)
5562                     {
5563                         for (uint32_t elementIdx = 0; elementIdx < numElements; ++elementIdx)
5564                             inputsPadded.push_back(inputs[dataIdx * numElements + elementIdx]);
5565                         for (uint32_t padIdx = 0; padIdx < arrayStride / 2 - numElements; ++padIdx)
5566                             inputsPadded.push_back(0);
5567                     }
5568 
5569                     resources.inputs.push_back(
5570                         Resource(BufferSp(new Int16Buffer(inputsPadded)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5571 
5572                     vector<int32_t> constIdxOutputs;
5573                     if (useConstIdx)
5574                     {
5575                         name += string("_const_idx_") + de::toString(constIdx);
5576                         for (uint32_t i = 0; i < numDataPoints; i++)
5577                         {
5578                             uint32_t idx = constIdx * numElements + i % numElements;
5579                             constIdxOutputs.push_back(intFacts[factIdx].isSigned ? sOutputs[idx] : uOutputs[idx]);
5580                         }
5581                     }
5582 
5583                     resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5584                     resources.outputs.clear();
5585                     if (useConstIdx)
5586                         resources.outputs.push_back(
5587                             Resource(BufferSp(new Int32Buffer(constIdxOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5588                     else if (intFacts[factIdx].isSigned)
5589                         resources.outputs.push_back(
5590                             Resource(BufferSp(new Int32Buffer(sOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5591                     else
5592                         resources.outputs.push_back(
5593                             Resource(BufferSp(new Int32Buffer(uOutputs)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5594 
5595                     features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5596                     features.coreFeatures.vertexPipelineStoresAndAtomics = true;
5597                     features.coreFeatures.fragmentStoresAndAtomics       = true;
5598 
5599                     createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions,
5600                                             testGroup, features);
5601                 }
5602 }
5603 
addGraphics16BitStorageUniformFloat16To32Group(tcu::TestCaseGroup * testGroup)5604 void addGraphics16BitStorageUniformFloat16To32Group(tcu::TestCaseGroup *testGroup)
5605 {
5606     de::Random rnd(deStringHash(testGroup->getName()));
5607     map<string, string> fragments;
5608     vector<string> extensions;
5609     const uint32_t numDataPoints = 256;
5610     RGBA defaultColors[4];
5611     const StringTemplate capabilities("OpCapability ${cap}\n");
5612     vector<deFloat16> float16Data = getFloat16s(rnd, numDataPoints);
5613 
5614     struct ConstantIndex
5615     {
5616         bool useConstantIndex;
5617         uint32_t constantIndex;
5618     };
5619 
5620     ConstantIndex constantIndices[] = {{false, 0}, {true, 4}, {true, 5}, {true, 6}};
5621 
5622     extensions.push_back("VK_KHR_16bit_storage");
5623     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
5624 
5625     getDefaultColors(defaultColors);
5626 
5627     { // scalar cases
5628         const StringTemplate preMain("      %f16 = OpTypeFloat 16\n"
5629                                      "%c_i32_256 = OpConstant %i32 256\n"
5630                                      " %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
5631                                      "   %up_f32 = OpTypePointer Uniform %f32\n"
5632                                      "   %up_f16 = OpTypePointer Uniform %f16\n"
5633                                      "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
5634                                      "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
5635                                      "   %SSBO32 = OpTypeStruct %ra_f32\n"
5636                                      "   %SSBO16 = OpTypeStruct %ra_f16\n"
5637                                      "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5638                                      "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5639                                      "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5640                                      "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5641 
5642         const StringTemplate decoration("OpDecorate %ra_f32 ArrayStride 4\n"
5643                                         "OpDecorate %ra_f16 ArrayStride ${arraystride}\n"
5644                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
5645                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
5646                                         "OpDecorate %SSBO32 BufferBlock\n"
5647                                         "OpDecorate %SSBO16 ${indecor}\n"
5648                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
5649                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
5650                                         "OpDecorate %ssbo32 Binding 1\n"
5651                                         "OpDecorate %ssbo16 Binding 0\n");
5652 
5653         // ssbo32[] <- convert ssbo16[] to 32bit float
5654         const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5655                                      "    %param = OpFunctionParameter %v4f32\n"
5656 
5657                                      "%entry = OpLabel\n"
5658                                      "    %i = OpVariable %fp_i32 Function\n"
5659                                      "         OpStore %i %c_i32_0\n"
5660                                      "         OpBranch %loop\n"
5661 
5662                                      " %loop = OpLabel\n"
5663                                      "   %15 = OpLoad %i32 %i\n"
5664                                      "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
5665                                      "         OpLoopMerge %merge %inc None\n"
5666                                      "         OpBranchConditional %lt %write %merge\n"
5667 
5668                                      "%write = OpLabel\n"
5669                                      "   %30 = OpLoad %i32 %i\n"
5670                                      "  %src = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5671                                      "%val16 = OpLoad %f16 %src\n"
5672                                      "%val32 = OpFConvert %f32 %val16\n"
5673                                      "  %dst = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
5674                                      "         OpStore %dst %val32\n"
5675                                      "         OpBranch %inc\n"
5676 
5677                                      "  %inc = OpLabel\n"
5678                                      "   %37 = OpLoad %i32 %i\n"
5679                                      "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5680                                      "         OpStore %i %39\n"
5681                                      "         OpBranch %loop\n"
5682 
5683                                      "%merge = OpLabel\n"
5684                                      "         OpReturnValue %param\n"
5685 
5686                                      "OpFunctionEnd\n");
5687 
5688         const uint32_t arrayStrides[] = {2, 16};
5689 
5690         for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5691         {
5692             for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5693             {
5694                 GraphicsResources resources;
5695                 map<string, string> specs;
5696                 VulkanFeatures features;
5697                 string testName   = string(CAPABILITIES[capIdx].name) + "_scalar_float";
5698                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
5699                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
5700 
5701                 specs["cap"]           = CAPABILITIES[capIdx].cap;
5702                 specs["indecor"]       = CAPABILITIES[capIdx].decor;
5703                 specs["arraystride"]   = de::toString(arrayStrides[capIdx]);
5704                 specs["constarrayidx"] = de::toString(constIdx);
5705                 if (useConstIdx)
5706                     specs["arrayindex"] = "c_i32_ci";
5707                 else
5708                     specs["arrayindex"] = "30";
5709 
5710                 fragments["capability"] = capabilities.specialize(specs);
5711                 fragments["decoration"] = decoration.specialize(specs);
5712                 fragments["pre_main"]   = preMain.specialize(specs);
5713                 fragments["testfun"]    = testFun.specialize(specs);
5714 
5715                 vector<deFloat16> inputData;
5716                 for (size_t dataIdx = 0; dataIdx < float16Data.size(); ++dataIdx)
5717                 {
5718                     inputData.push_back(float16Data[dataIdx]);
5719                     for (uint32_t padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 1; ++padIdx)
5720                         inputData.push_back(deFloat16(0.0f));
5721                 }
5722 
5723                 vector<float> float32Data;
5724                 float32Data.reserve(numDataPoints);
5725                 for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
5726                     float32Data.push_back(deFloat16To32(float16Data[useConstIdx ? constIdx : numIdx]));
5727 
5728                 resources.inputs.push_back(
5729                     Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5730                 resources.outputs.push_back(
5731                     Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5732                 resources.verifyIO = check32BitFloats;
5733                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5734 
5735                 features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5736                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
5737                 features.coreFeatures.fragmentStoresAndAtomics       = true;
5738 
5739                 if (useConstIdx)
5740                     testName += string("_const_idx_") + de::toString(constIdx);
5741 
5742                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
5743                                         testGroup, features);
5744             }
5745         }
5746     }
5747 
5748     { // vector cases
5749         const StringTemplate preMain("      %f16 = OpTypeFloat 16\n"
5750                                      "%c_i32_128 = OpConstant %i32 128\n"
5751                                      "%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
5752                                      "     %v2f16 = OpTypeVector %f16 2\n"
5753                                      " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
5754                                      " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
5755                                      " %ra_v2f32 = OpTypeArray %v2f32 %c_i32_128\n"
5756                                      " %ra_v2f16 = OpTypeArray %v2f16 %c_i32_128\n"
5757                                      "   %SSBO32 = OpTypeStruct %ra_v2f32\n"
5758                                      "   %SSBO16 = OpTypeStruct %ra_v2f16\n"
5759                                      "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5760                                      "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5761                                      "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5762                                      "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
5763 
5764         const StringTemplate decoration("OpDecorate %ra_v2f32 ArrayStride 8\n"
5765                                         "OpDecorate %ra_v2f16 ArrayStride ${arraystride}\n"
5766                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
5767                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
5768                                         "OpDecorate %SSBO32 BufferBlock\n"
5769                                         "OpDecorate %SSBO16 ${indecor}\n"
5770                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
5771                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
5772                                         "OpDecorate %ssbo32 Binding 1\n"
5773                                         "OpDecorate %ssbo16 Binding 0\n");
5774 
5775         // ssbo32[] <- convert ssbo16[] to 32bit float
5776         const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5777                                      "    %param = OpFunctionParameter %v4f32\n"
5778 
5779                                      "%entry = OpLabel\n"
5780                                      "    %i = OpVariable %fp_i32 Function\n"
5781                                      "         OpStore %i %c_i32_0\n"
5782                                      "         OpBranch %loop\n"
5783 
5784                                      " %loop = OpLabel\n"
5785                                      "   %15 = OpLoad %i32 %i\n"
5786                                      "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
5787                                      "         OpLoopMerge %merge %inc None\n"
5788                                      "         OpBranchConditional %lt %write %merge\n"
5789 
5790                                      "%write = OpLabel\n"
5791                                      "   %30 = OpLoad %i32 %i\n"
5792                                      "  %src = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
5793                                      "%val16 = OpLoad %v2f16 %src\n"
5794                                      "%val32 = OpFConvert %v2f32 %val16\n"
5795                                      "  %dst = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30\n"
5796                                      "         OpStore %dst %val32\n"
5797                                      "         OpBranch %inc\n"
5798 
5799                                      "  %inc = OpLabel\n"
5800                                      "   %37 = OpLoad %i32 %i\n"
5801                                      "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5802                                      "         OpStore %i %39\n"
5803                                      "         OpBranch %loop\n"
5804 
5805                                      "%merge = OpLabel\n"
5806                                      "         OpReturnValue %param\n"
5807 
5808                                      "OpFunctionEnd\n");
5809 
5810         const uint32_t arrayStrides[] = {4, 16};
5811 
5812         for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
5813         {
5814             for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5815             {
5816                 GraphicsResources resources;
5817                 map<string, string> specs;
5818                 VulkanFeatures features;
5819                 string testName   = string(CAPABILITIES[capIdx].name) + "_vector_float";
5820                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
5821                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
5822 
5823                 specs["cap"]           = CAPABILITIES[capIdx].cap;
5824                 specs["indecor"]       = CAPABILITIES[capIdx].decor;
5825                 specs["arraystride"]   = de::toString(arrayStrides[capIdx]);
5826                 specs["constarrayidx"] = de::toString(constIdx);
5827                 if (useConstIdx)
5828                     specs["arrayindex"] = "c_i32_ci";
5829                 else
5830                     specs["arrayindex"] = "30";
5831 
5832                 fragments["capability"] = capabilities.specialize(specs);
5833                 fragments["decoration"] = decoration.specialize(specs);
5834                 fragments["pre_main"]   = preMain.specialize(specs);
5835                 fragments["testfun"]    = testFun.specialize(specs);
5836 
5837                 vector<deFloat16> inputData;
5838                 for (size_t dataIdx = 0; dataIdx < float16Data.size() / 2; ++dataIdx)
5839                 {
5840                     inputData.push_back(float16Data[dataIdx * 2]);
5841                     inputData.push_back(float16Data[dataIdx * 2 + 1]);
5842                     for (uint32_t padIdx = 0; padIdx < arrayStrides[capIdx] / 2 - 2; ++padIdx)
5843                         inputData.push_back(deFloat16(0.0f));
5844                 }
5845 
5846                 vector<float> float32Data;
5847                 float32Data.reserve(numDataPoints);
5848                 for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
5849                     float32Data.push_back(
5850                         deFloat16To32(float16Data[constantIndices[constIndexIdx].useConstantIndex ?
5851                                                       (constantIndices[constIndexIdx].constantIndex * 2 + numIdx % 2) :
5852                                                       numIdx]));
5853 
5854                 resources.inputs.push_back(
5855                     Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5856                 resources.outputs.push_back(
5857                     Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5858                 resources.verifyIO = check32BitFloats;
5859                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5860 
5861                 features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5862                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
5863                 features.coreFeatures.fragmentStoresAndAtomics       = true;
5864 
5865                 if (constantIndices[constIndexIdx].useConstantIndex)
5866                     testName += string("_const_idx_") + de::toString(constantIndices[constIndexIdx].constantIndex);
5867 
5868                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
5869                                         testGroup, features);
5870             }
5871         }
5872     }
5873 
5874     { // matrix cases
5875         fragments["pre_main"] = " %c_i32_32 = OpConstant %i32 32\n"
5876                                 "      %f16 = OpTypeFloat 16\n"
5877                                 "    %v2f16 = OpTypeVector %f16 2\n"
5878                                 "  %m4x2f32 = OpTypeMatrix %v2f32 4\n"
5879                                 "  %m4x2f16 = OpTypeMatrix %v2f16 4\n"
5880                                 " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
5881                                 " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
5882                                 "%a8m4x2f32 = OpTypeArray %m4x2f32 %c_i32_32\n"
5883                                 "%a8m4x2f16 = OpTypeArray %m4x2f16 %c_i32_32\n"
5884                                 "   %SSBO32 = OpTypeStruct %a8m4x2f32\n"
5885                                 "   %SSBO16 = OpTypeStruct %a8m4x2f16\n"
5886                                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
5887                                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
5888                                 "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
5889                                 "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
5890 
5891         const StringTemplate decoration("OpDecorate %a8m4x2f32 ArrayStride 32\n"
5892                                         "OpDecorate %a8m4x2f16 ArrayStride 16\n"
5893                                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
5894                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
5895                                         "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
5896                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
5897                                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
5898                                         "OpMemberDecorate %SSBO16 0 MatrixStride 4\n"
5899                                         "OpDecorate %SSBO32 BufferBlock\n"
5900                                         "OpDecorate %SSBO16 ${indecor}\n"
5901                                         "OpDecorate %ssbo32 DescriptorSet 0\n"
5902                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
5903                                         "OpDecorate %ssbo32 Binding 1\n"
5904                                         "OpDecorate %ssbo16 Binding 0\n");
5905 
5906         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
5907                                "    %param = OpFunctionParameter %v4f32\n"
5908 
5909                                "%entry = OpLabel\n"
5910                                "    %i = OpVariable %fp_i32 Function\n"
5911                                "         OpStore %i %c_i32_0\n"
5912                                "         OpBranch %loop\n"
5913 
5914                                " %loop = OpLabel\n"
5915                                "   %15 = OpLoad %i32 %i\n"
5916                                "   %lt = OpSLessThan %bool %15 %c_i32_32\n"
5917                                "         OpLoopMerge %merge %inc None\n"
5918                                "         OpBranchConditional %lt %write %merge\n"
5919 
5920                                "  %write = OpLabel\n"
5921                                "     %30 = OpLoad %i32 %i\n"
5922                                "  %src_0 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
5923                                "  %src_1 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
5924                                "  %src_2 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
5925                                "  %src_3 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
5926                                "%val16_0 = OpLoad %v2f16 %src_0\n"
5927                                "%val16_1 = OpLoad %v2f16 %src_1\n"
5928                                "%val16_2 = OpLoad %v2f16 %src_2\n"
5929                                "%val16_3 = OpLoad %v2f16 %src_3\n"
5930                                "%val32_0 = OpFConvert %v2f32 %val16_0\n"
5931                                "%val32_1 = OpFConvert %v2f32 %val16_1\n"
5932                                "%val32_2 = OpFConvert %v2f32 %val16_2\n"
5933                                "%val32_3 = OpFConvert %v2f32 %val16_3\n"
5934                                "  %dst_0 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
5935                                "  %dst_1 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
5936                                "  %dst_2 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
5937                                "  %dst_3 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
5938                                "           OpStore %dst_0 %val32_0\n"
5939                                "           OpStore %dst_1 %val32_1\n"
5940                                "           OpStore %dst_2 %val32_2\n"
5941                                "           OpStore %dst_3 %val32_3\n"
5942                                "           OpBranch %inc\n"
5943 
5944                                "  %inc = OpLabel\n"
5945                                "   %37 = OpLoad %i32 %i\n"
5946                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
5947                                "         OpStore %i %39\n"
5948                                "         OpBranch %loop\n"
5949 
5950                                "%merge = OpLabel\n"
5951                                "         OpReturnValue %param\n"
5952 
5953                                "OpFunctionEnd\n";
5954 
5955         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
5956         {
5957             GraphicsResources resources;
5958             map<string, string> specs;
5959             VulkanFeatures features;
5960             string testName = string(CAPABILITIES[capIdx].name) + "_matrix_float";
5961 
5962             specs["cap"]     = CAPABILITIES[capIdx].cap;
5963             specs["indecor"] = CAPABILITIES[capIdx].decor;
5964 
5965             fragments["capability"] = capabilities.specialize(specs);
5966             fragments["decoration"] = decoration.specialize(specs);
5967 
5968             vector<float> float32Data;
5969             float32Data.reserve(numDataPoints);
5970             for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
5971                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
5972 
5973             resources.inputs.push_back(
5974                 Resource(BufferSp(new Float16Buffer(float16Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5975             resources.outputs.push_back(
5976                 Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5977             resources.verifyIO = check32BitFloats;
5978             resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
5979 
5980             features                                             = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
5981             features.coreFeatures.vertexPipelineStoresAndAtomics = true;
5982             features.coreFeatures.fragmentStoresAndAtomics       = true;
5983 
5984             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup,
5985                                     features);
5986         }
5987     }
5988 }
5989 
addGraphics16BitStorageUniformStructFloat16To32Group(tcu::TestCaseGroup * testGroup)5990 void addGraphics16BitStorageUniformStructFloat16To32Group(tcu::TestCaseGroup *testGroup)
5991 {
5992     de::Random rnd(deStringHash(testGroup->getName()));
5993     map<string, string> fragments;
5994     vector<string> extensions;
5995     RGBA defaultColors[4];
5996     const StringTemplate capabilities("OpCapability ${cap}\n");
5997     vector<float> float32Data(getStructSize(SHADERTEMPLATE_STRIDE32BIT_STD430), 0.0f);
5998 
5999     extensions.push_back("VK_KHR_16bit_storage");
6000     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
6001 
6002     getDefaultColors(defaultColors);
6003 
6004     const StringTemplate preMain("\n"
6005                                  "${types}\n"
6006                                  "\n"
6007                                  "%zero = OpConstant %i32 0\n"
6008                                  "%c_i32_5 = OpConstant %i32 5\n"
6009                                  "%c_i32_6 = OpConstant %i32 6\n"
6010                                  "%c_i32_7 = OpConstant %i32 7\n"
6011                                  "%c_i32_8 = OpConstant %i32 8\n"
6012                                  "%c_i32_9 = OpConstant %i32 9\n"
6013                                  "%c_i32_11 = OpConstant %i32 11\n"
6014                                  "\n"
6015                                  "%c_u32_7 = OpConstant %u32 7\n"
6016                                  "%c_u32_11 = OpConstant %u32 11\n"
6017                                  "\n"
6018                                  "%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
6019                                  "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
6020                                  "%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
6021                                  "%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
6022                                  "%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
6023                                  "%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
6024                                  "%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
6025                                  "%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 "
6026                                  "%v2f16arr11 %f16 %v3f16arr11 %v4f16arr3\n"
6027                                  "\n"
6028                                  "%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
6029                                  "%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
6030                                  "%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
6031                                  "%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
6032                                  "%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
6033                                  "%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
6034                                  "%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
6035                                  "%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 "
6036                                  "%v2f32arr11 %f32 %v3f32arr11 %v4f32arr3\n"
6037                                  "\n"
6038                                  "%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
6039                                  "%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
6040                                  "%SSBO_IN            = OpTypeStruct %f16StructArr7\n"
6041                                  "%SSBO_OUT           = OpTypeStruct %f32StructArr7\n"
6042                                  "%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
6043                                  "%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
6044                                  "%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
6045                                  "%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
6046                                  "\n");
6047 
6048     const StringTemplate decoration("${strideF16}"
6049                                     "\n"
6050                                     "${strideF32}"
6051                                     "\n"
6052                                     "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
6053                                     "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
6054                                     "OpDecorate %SSBO_IN ${indecor}\n"
6055                                     "OpDecorate %SSBO_OUT BufferBlock\n"
6056                                     "OpDecorate %ssboIN DescriptorSet 0\n"
6057                                     "OpDecorate %ssboOUT DescriptorSet 0\n"
6058                                     "OpDecorate %ssboIN Binding 0\n"
6059                                     "OpDecorate %ssboOUT Binding 1\n"
6060                                     "\n");
6061 
6062     fragments["testfun"] =
6063         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6064         "    %param = OpFunctionParameter %v4f32\n"
6065         "%label     = OpLabel\n"
6066         "%loopNdx    = OpVariable %fp_i32 Function\n"
6067         "%insideLoopNdx = OpVariable %fp_i32 Function\n"
6068 
6069         "OpStore %loopNdx %zero\n"
6070         "OpBranch %loop\n"
6071         "%loop = OpLabel\n"
6072         "OpLoopMerge %merge %13 None\n"
6073         "OpBranch %14\n"
6074         "%14 = OpLabel\n"
6075         "%valLoopNdx = OpLoad %i32 %loopNdx\n"
6076         "%18 = OpSLessThan %bool %valLoopNdx %c_i32_7\n"
6077         "OpBranchConditional %18 %11 %merge\n"
6078         "%11 = OpLabel\n"
6079         "\n"
6080         "%f16src  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %zero\n"
6081         "%val_f16 = OpLoad %f16 %f16src\n"
6082         "%val_f32 = OpFConvert %f32 %val_f16\n"
6083         "%f32dst  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %zero\n"
6084         "OpStore %f32dst %val_f32\n"
6085         "\n"
6086         "%v2f16src  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_1\n"
6087         "%val_v2f16 = OpLoad %v2f16 %v2f16src\n"
6088         "%val_v2f32 = OpFConvert %v2f32 %val_v2f16\n"
6089         "%v2f32dst  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_1\n"
6090         "OpStore %v2f32dst %val_v2f32\n"
6091         "\n"
6092         "%v3f16src  = OpAccessChain %v3f16ptr %ssboIN %zero %valLoopNdx %c_i32_2\n"
6093         "%val_v3f16 = OpLoad %v3f16 %v3f16src\n"
6094         "%val_v3f32 = OpFConvert %v3f32 %val_v3f16\n"
6095         "%v3f32dst  = OpAccessChain %v3f32ptr %ssboOUT %zero %valLoopNdx %c_i32_2\n"
6096         "OpStore %v3f32dst %val_v3f32\n"
6097         "\n"
6098         "%v4f16src  = OpAccessChain %v4f16ptr %ssboIN %zero %valLoopNdx %c_i32_3\n"
6099         "%val_v4f16 = OpLoad %v4f16 %v4f16src\n"
6100         "%val_v4f32 = OpFConvert %v4f32 %val_v4f16\n"
6101         "%v4f32dst  = OpAccessChain %v4f32ptr %ssboOUT %zero %valLoopNdx %c_i32_3\n"
6102         "OpStore %v4f32dst %val_v4f32\n"
6103         "\n"
6104         "%f16src2  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %c_i32_7\n"
6105         "%val2_f16 = OpLoad %f16 %f16src2\n"
6106         "%val2_f32 = OpFConvert %f32 %val2_f16\n"
6107         "%f32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %c_i32_7\n"
6108         "OpStore %f32dst2 %val2_f32\n"
6109         "\n"
6110         "OpStore %insideLoopNdx %zero\n"
6111         "OpBranch %loopInside\n"
6112         "%loopInside = OpLabel\n"
6113         "OpLoopMerge %92 %93 None\n"
6114         "OpBranch %94\n"
6115         "%94 = OpLabel\n"
6116         "%valInsideLoopNdx = OpLoad %i32 %insideLoopNdx\n"
6117         "%96 = OpSLessThan %bool %valInsideLoopNdx %c_i32_11\n"
6118         "OpBranchConditional %96 %91 %92\n"
6119         "\n"
6120         "%91 = OpLabel\n"
6121         "\n"
6122         "%v2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6123         "%val2_v2f16 = OpLoad %v2f16 %v2f16src2\n"
6124         "%val2_v2f32 = OpFConvert %v2f32 %val2_v2f16\n"
6125         "%v2f32dst2  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6126         "OpStore %v2f32dst2 %val2_v2f32\n"
6127         "\n"
6128         "%v3f16src2  = OpAccessChain %v3f16ptr %ssboIN %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6129         "%val2_v3f16 = OpLoad %v3f16 %v3f16src2\n"
6130         "%val2_v3f32 = OpFConvert %v3f32 %val2_v3f16\n"
6131         "%v3f32dst2  = OpAccessChain %v3f32ptr %ssboOUT %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6132         "OpStore %v3f32dst2 %val2_v3f32\n"
6133         "\n"
6134         //struct {f16, v2f16[3]}
6135         "%Sf16src  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6136         "%Sval_f16 = OpLoad %f16 %Sf16src\n"
6137         "%Sval_f32 = OpFConvert %f32 %Sval_f16\n"
6138         "%Sf32dst2  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6139         "OpStore %Sf32dst2 %Sval_f32\n"
6140         "\n"
6141         "%Sv2f16src0   = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6142         "%Sv2f16_0     = OpLoad %v2f16 %Sv2f16src0\n"
6143         "%Sv2f32_0     = OpFConvert %v2f32 %Sv2f16_0\n"
6144         "%Sv2f32dst_0  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6145         "OpStore %Sv2f32dst_0 %Sv2f32_0\n"
6146         "\n"
6147         "%Sv2f16src1  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6148         "%c_i32_1\n"
6149         "%Sv2f16_1 = OpLoad %v2f16 %Sv2f16src1\n"
6150         "%Sv2f32_1 = OpFConvert %v2f32 %Sv2f16_1\n"
6151         "%Sv2f32dst_1  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6152         "%c_i32_1\n"
6153         "OpStore %Sv2f32dst_1 %Sv2f32_1\n"
6154         "\n"
6155         "%Sv2f16src2  = OpAccessChain %v2f16ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6156         "%c_i32_2\n"
6157         "%Sv2f16_2 = OpLoad %v2f16 %Sv2f16src2\n"
6158         "%Sv2f32_2 = OpFConvert %v2f32 %Sv2f16_2\n"
6159         "%Sv2f32dst_2  = OpAccessChain %v2f32ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6160         "%c_i32_2\n"
6161         "OpStore %Sv2f32dst_2 %Sv2f32_2\n"
6162         "\n"
6163         //Array with 3 elements
6164         "%LessThan3 = OpSLessThan %bool %valInsideLoopNdx %c_i32_3\n"
6165         "OpSelectionMerge %BlockIf None\n"
6166         "OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
6167         "%LabelIf = OpLabel\n"
6168         "  %f16src3  = OpAccessChain %f16ptr %ssboIN %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6169         "  %val3_f16 = OpLoad %f16 %f16src3\n"
6170         "  %val3_f32 = OpFConvert %f32 %val3_f16\n"
6171         "  %f32dst3  = OpAccessChain %f32ptr %ssboOUT %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6172         "  OpStore %f32dst3 %val3_f32\n"
6173         "\n"
6174         "  %v4f16src2  = OpAccessChain %v4f16ptr %ssboIN %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6175         "  %val2_v4f16 = OpLoad %v4f16 %v4f16src2\n"
6176         "  %val2_v4f32 = OpFConvert %v4f32 %val2_v4f16\n"
6177         "  %v4f32dst2  = OpAccessChain %v4f32ptr %ssboOUT %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6178         "  OpStore %v4f32dst2 %val2_v4f32\n"
6179         "OpBranch %BlockIf\n"
6180         "%BlockIf = OpLabel\n"
6181         "\n"
6182         "OpBranch %93\n"
6183         "%93 = OpLabel\n"
6184         "%132 = OpLoad %i32 %insideLoopNdx\n"
6185         "%133 = OpIAdd %i32 %132 %c_i32_1\n"
6186         "OpStore %insideLoopNdx %133\n"
6187         "OpBranch %loopInside\n"
6188         "\n"
6189         "%92 = OpLabel\n"
6190         "OpBranch %13\n"
6191         "%13 = OpLabel\n"
6192         "%134 = OpLoad %i32 %loopNdx\n"
6193         "%135 = OpIAdd %i32 %134 %c_i32_1\n"
6194         "OpStore %loopNdx %135\n"
6195         "OpBranch %loop\n"
6196 
6197         "%merge = OpLabel\n"
6198         "         OpReturnValue %param\n"
6199         "         OpFunctionEnd\n";
6200 
6201     for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6202     {
6203         vector<deFloat16> float16Data = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
6204                                             data16bitStd430(rnd) :
6205                                             data16bitStd140(rnd);
6206         GraphicsResources resources;
6207         map<string, string> specs;
6208         VulkanFeatures features;
6209         string testName = string(CAPABILITIES[capIdx].name);
6210 
6211         specs["cap"]       = CAPABILITIES[capIdx].cap;
6212         specs["indecor"]   = CAPABILITIES[capIdx].decor;
6213         specs["strideF16"] = getStructShaderComponet((VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
6214                                                          SHADERTEMPLATE_STRIDE16BIT_STD430 :
6215                                                          SHADERTEMPLATE_STRIDE16BIT_STD140);
6216         specs["strideF32"] = getStructShaderComponet(SHADERTEMPLATE_STRIDE32BIT_STD430);
6217         specs["types"]     = getStructShaderComponet(SHADERTEMPLATE_TYPES);
6218 
6219         fragments["capability"] = capabilities.specialize(specs);
6220         fragments["decoration"] = decoration.specialize(specs);
6221         fragments["pre_main"]   = preMain.specialize(specs);
6222 
6223         resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data)), CAPABILITIES[capIdx].dtype));
6224         resources.outputs.push_back(
6225             Resource(BufferSp(new Float32Buffer(float32Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6226         resources.verifyIO = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
6227                                  graphicsCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD430,
6228                                                      SHADERTEMPLATE_STRIDE32BIT_STD430> :
6229                                  graphicsCheckStruct<deFloat16, float, SHADERTEMPLATE_STRIDE16BIT_STD140,
6230                                                      SHADERTEMPLATE_STRIDE32BIT_STD430>;
6231 
6232         features                                             = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
6233         features.coreFeatures.vertexPipelineStoresAndAtomics = true;
6234         features.coreFeatures.fragmentStoresAndAtomics       = true;
6235 
6236         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup,
6237                                 features);
6238     }
6239 }
6240 
addGraphics16BitStorageUniformStructFloat32To16Group(tcu::TestCaseGroup * testGroup)6241 void addGraphics16BitStorageUniformStructFloat32To16Group(tcu::TestCaseGroup *testGroup)
6242 {
6243     de::Random rnd(deStringHash(testGroup->getName()));
6244     map<string, string> fragments;
6245     vector<string> extensions;
6246     RGBA defaultColors[4];
6247     const StringTemplate capabilities("OpCapability ${cap}\n");
6248     vector<uint16_t> float16Data(getStructSize(SHADERTEMPLATE_STRIDE16BIT_STD430), 0u);
6249 
6250     extensions.push_back("VK_KHR_16bit_storage");
6251     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
6252 
6253     getDefaultColors(defaultColors);
6254 
6255     const StringTemplate preMain("\n"
6256                                  "${types}\n"
6257                                  "\n"
6258                                  "%zero = OpConstant %i32 0\n"
6259                                  "%c_i32_5 = OpConstant %i32 5\n"
6260                                  "%c_i32_6 = OpConstant %i32 6\n"
6261                                  "%c_i32_7 = OpConstant %i32 7\n"
6262                                  "%c_i32_8 = OpConstant %i32 8\n"
6263                                  "%c_i32_9 = OpConstant %i32 9\n"
6264                                  "%c_i32_11 = OpConstant %i32 11\n"
6265                                  "\n"
6266                                  "%c_u32_7 = OpConstant %u32 7\n"
6267                                  "%c_u32_11 = OpConstant %u32 11\n"
6268                                  "\n"
6269                                  "%f16arr3       = OpTypeArray %f16 %c_u32_3\n"
6270                                  "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
6271                                  "%v2f16arr11    = OpTypeArray %v2f16 %c_u32_11\n"
6272                                  "%v3f16arr11    = OpTypeArray %v3f16 %c_u32_11\n"
6273                                  "%v4f16arr3     = OpTypeArray %v4f16 %c_u32_3\n"
6274                                  "%struct16      = OpTypeStruct %f16 %v2f16arr3\n"
6275                                  "%struct16arr11 = OpTypeArray %struct16 %c_u32_11\n"
6276                                  "%f16Struct = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr11 "
6277                                  "%v2f16arr11 %f16 %v3f16arr11 %v4f16arr3\n"
6278                                  "\n"
6279                                  "%f32arr3   = OpTypeArray %f32 %c_u32_3\n"
6280                                  "%v2f32arr3 = OpTypeArray %v2f32 %c_u32_3\n"
6281                                  "%v2f32arr11 = OpTypeArray %v2f32 %c_u32_11\n"
6282                                  "%v3f32arr11 = OpTypeArray %v3f32 %c_u32_11\n"
6283                                  "%v4f32arr3 = OpTypeArray %v4f32 %c_u32_3\n"
6284                                  "%struct32      = OpTypeStruct %f32 %v2f32arr3\n"
6285                                  "%struct32arr11 = OpTypeArray %struct32 %c_u32_11\n"
6286                                  "%f32Struct = OpTypeStruct %f32 %v2f32 %v3f32 %v4f32 %f32arr3 %struct32arr11 "
6287                                  "%v2f32arr11 %f32 %v3f32arr11 %v4f32arr3\n"
6288                                  "\n"
6289                                  "%f16StructArr7      = OpTypeArray %f16Struct %c_u32_7\n"
6290                                  "%f32StructArr7      = OpTypeArray %f32Struct %c_u32_7\n"
6291                                  "%SSBO_IN            = OpTypeStruct %f32StructArr7\n"
6292                                  "%SSBO_OUT           = OpTypeStruct %f16StructArr7\n"
6293                                  "%up_SSBOIN          = OpTypePointer Uniform %SSBO_IN\n"
6294                                  "%up_SSBOOUT         = OpTypePointer Uniform %SSBO_OUT\n"
6295                                  "%ssboIN             = OpVariable %up_SSBOIN Uniform\n"
6296                                  "%ssboOUT            = OpVariable %up_SSBOOUT Uniform\n"
6297                                  "\n");
6298 
6299     const StringTemplate decoration("${strideF16}"
6300                                     "\n"
6301                                     "${strideF32}"
6302                                     "\n"
6303                                     "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
6304                                     "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
6305                                     "OpDecorate %SSBO_IN ${indecor}\n"
6306                                     "OpDecorate %SSBO_OUT BufferBlock\n"
6307                                     "OpDecorate %ssboIN DescriptorSet 0\n"
6308                                     "OpDecorate %ssboOUT DescriptorSet 0\n"
6309                                     "OpDecorate %ssboIN Binding 0\n"
6310                                     "OpDecorate %ssboOUT Binding 1\n"
6311                                     "\n");
6312 
6313     fragments["testfun"] =
6314         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6315         "%param = OpFunctionParameter %v4f32\n"
6316         "%label     = OpLabel\n"
6317         "%loopNdx    = OpVariable %fp_i32 Function\n"
6318         "%insideLoopNdx = OpVariable %fp_i32 Function\n"
6319 
6320         "OpStore %loopNdx %zero\n"
6321         "OpBranch %loop\n"
6322         "%loop = OpLabel\n"
6323         "OpLoopMerge %merge %13 None\n"
6324         "OpBranch %14\n"
6325         "%14 = OpLabel\n"
6326         "%valLoopNdx = OpLoad %i32 %loopNdx\n"
6327         "%18 = OpSLessThan %bool %valLoopNdx %c_i32_7\n"
6328         "OpBranchConditional %18 %11 %merge\n"
6329         "%11 = OpLabel\n"
6330         "\n"
6331         "%f32src  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %zero\n"
6332         "%val_f32 = OpLoad %f32 %f32src\n"
6333         "%val_f16 = OpFConvert %f16 %val_f32\n"
6334         "%f16dst  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %zero\n"
6335         "OpStore %f16dst %val_f16\n"
6336         "\n"
6337         "%v2f32src  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_1\n"
6338         "%val_v2f32 = OpLoad %v2f32 %v2f32src\n"
6339         "%val_v2f16 = OpFConvert %v2f16 %val_v2f32\n"
6340         "%v2f16dst  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_1\n"
6341         "OpStore %v2f16dst %val_v2f16\n"
6342         "\n"
6343         "%v3f32src  = OpAccessChain %v3f32ptr %ssboIN %zero %valLoopNdx %c_i32_2\n"
6344         "%val_v3f32 = OpLoad %v3f32 %v3f32src\n"
6345         "%val_v3f16 = OpFConvert %v3f16 %val_v3f32\n"
6346         "%v3f16dst  = OpAccessChain %v3f16ptr %ssboOUT %zero %valLoopNdx %c_i32_2\n"
6347         "OpStore %v3f16dst %val_v3f16\n"
6348         "\n"
6349         "%v4f32src  = OpAccessChain %v4f32ptr %ssboIN %zero %valLoopNdx %c_i32_3\n"
6350         "%val_v4f32 = OpLoad %v4f32 %v4f32src\n"
6351         "%val_v4f16 = OpFConvert %v4f16 %val_v4f32\n"
6352         "%v4f16dst  = OpAccessChain %v4f16ptr %ssboOUT %zero %valLoopNdx %c_i32_3\n"
6353         "OpStore %v4f16dst %val_v4f16\n"
6354         "\n"
6355         "%f32src2  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %c_i32_7\n"
6356         "%val2_f32 = OpLoad %f32 %f32src2\n"
6357         "%val2_f16 = OpFConvert %f16 %val2_f32\n"
6358         "%f16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %c_i32_7\n"
6359         "OpStore %f16dst2 %val2_f16\n"
6360         "\n"
6361         "OpStore %insideLoopNdx %zero\n"
6362         "OpBranch %loopInside\n"
6363         "%loopInside = OpLabel\n"
6364         "OpLoopMerge %92 %93 None\n"
6365         "OpBranch %94\n"
6366         "%94 = OpLabel\n"
6367         "%valInsideLoopNdx = OpLoad %i32 %insideLoopNdx\n"
6368         "%96 = OpSLessThan %bool %valInsideLoopNdx %c_i32_11\n"
6369         "OpBranchConditional %96 %91 %92\n"
6370         "\n"
6371         "%91 = OpLabel\n"
6372         "\n"
6373         //struct {f16, v2f16[3]}
6374         "%Sf32src  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6375         "%Sval_f32 = OpLoad %f32 %Sf32src\n"
6376         "%Sval_f16 = OpFConvert %f16 %Sval_f32\n"
6377         "%Sf16dst2  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %zero\n"
6378         "OpStore %Sf16dst2 %Sval_f16\n"
6379         "\n"
6380         "%Sv2f32src0   = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6381         "%Sv2f32_0     = OpLoad %v2f32 %Sv2f32src0\n"
6382         "%Sv2f16_0     = OpFConvert %v2f16 %Sv2f32_0\n"
6383         "%Sv2f16dst_0  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 %zero\n"
6384         "OpStore %Sv2f16dst_0 %Sv2f16_0\n"
6385         "\n"
6386         "%Sv2f32src1  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6387         "%c_i32_1\n"
6388         "%Sv2f32_1 = OpLoad %v2f32 %Sv2f32src1\n"
6389         "%Sv2f16_1 = OpFConvert %v2f16 %Sv2f32_1\n"
6390         "%Sv2f16dst_1  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6391         "%c_i32_1\n"
6392         "OpStore %Sv2f16dst_1 %Sv2f16_1\n"
6393         "\n"
6394         "%Sv2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6395         "%c_i32_2\n"
6396         "%Sv2f32_2 = OpLoad %v2f32 %Sv2f32src2\n"
6397         "%Sv2f16_2 = OpFConvert %v2f16 %Sv2f32_2\n"
6398         "%Sv2f16dst_2  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_5 %valInsideLoopNdx %c_i32_1 "
6399         "%c_i32_2\n"
6400         "OpStore %Sv2f16dst_2 %Sv2f16_2\n"
6401         "\n"
6402 
6403         "%v2f32src2  = OpAccessChain %v2f32ptr %ssboIN %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6404         "%val2_v2f32 = OpLoad %v2f32 %v2f32src2\n"
6405         "%val2_v2f16 = OpFConvert %v2f16 %val2_v2f32\n"
6406         "%v2f16dst2  = OpAccessChain %v2f16ptr %ssboOUT %zero %valLoopNdx %c_i32_6 %valInsideLoopNdx\n"
6407         "OpStore %v2f16dst2 %val2_v2f16\n"
6408         "\n"
6409         "%v3f32src2  = OpAccessChain %v3f32ptr %ssboIN %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6410         "%val2_v3f32 = OpLoad %v3f32 %v3f32src2\n"
6411         "%val2_v3f16 = OpFConvert %v3f16 %val2_v3f32\n"
6412         "%v3f16dst2  = OpAccessChain %v3f16ptr %ssboOUT %zero %valLoopNdx %c_i32_8 %valInsideLoopNdx\n"
6413         "OpStore %v3f16dst2 %val2_v3f16\n"
6414         "\n"
6415 
6416         //Array with 3 elements
6417         "%LessThan3 = OpSLessThan %bool %valInsideLoopNdx %c_i32_3\n"
6418         "OpSelectionMerge %BlockIf None\n"
6419         "OpBranchConditional %LessThan3 %LabelIf %BlockIf\n"
6420         "  %LabelIf = OpLabel\n"
6421         "  %f32src3  = OpAccessChain %f32ptr %ssboIN %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6422         "  %val3_f32 = OpLoad %f32 %f32src3\n"
6423         "  %val3_f16 = OpFConvert %f16 %val3_f32\n"
6424         "  %f16dst3  = OpAccessChain %f16ptr %ssboOUT %zero %valLoopNdx %c_i32_4 %valInsideLoopNdx\n"
6425         "  OpStore %f16dst3 %val3_f16\n"
6426         "\n"
6427         "  %v4f32src2  = OpAccessChain %v4f32ptr %ssboIN %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6428         "  %val2_v4f32 = OpLoad %v4f32 %v4f32src2\n"
6429         "  %val2_v4f16 = OpFConvert %v4f16 %val2_v4f32\n"
6430         "  %v4f16dst2  = OpAccessChain %v4f16ptr %ssboOUT %zero %valLoopNdx %c_i32_9 %valInsideLoopNdx\n"
6431         "  OpStore %v4f16dst2 %val2_v4f16\n"
6432         "OpBranch %BlockIf\n"
6433         "%BlockIf = OpLabel\n"
6434 
6435         "OpBranch %93\n"
6436         "%93 = OpLabel\n"
6437         "%132 = OpLoad %i32 %insideLoopNdx\n"
6438         "%133 = OpIAdd %i32 %132 %c_i32_1\n"
6439         "OpStore %insideLoopNdx %133\n"
6440         "OpBranch %loopInside\n"
6441         "\n"
6442         "%92 = OpLabel\n"
6443         "OpBranch %13\n"
6444         "%13 = OpLabel\n"
6445         "%134 = OpLoad %i32 %loopNdx\n"
6446         "%135 = OpIAdd %i32 %134 %c_i32_1\n"
6447         "OpStore %loopNdx %135\n"
6448         "OpBranch %loop\n"
6449 
6450         "%merge = OpLabel\n"
6451         "         OpReturnValue %param\n"
6452         "         OpFunctionEnd\n";
6453 
6454     for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6455     {
6456         map<string, string> specs;
6457         string testName           = string(CAPABILITIES[capIdx].name);
6458         vector<float> float32Data = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
6459                                         data32bitStd430(rnd) :
6460                                         data32bitStd140(rnd);
6461         GraphicsResources resources;
6462 
6463         specs["cap"]       = "StorageUniformBufferBlock16";
6464         specs["indecor"]   = CAPABILITIES[capIdx].decor;
6465         specs["strideF16"] = getStructShaderComponet(SHADERTEMPLATE_STRIDE16BIT_STD430);
6466         specs["strideF32"] = getStructShaderComponet((VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
6467                                                          SHADERTEMPLATE_STRIDE32BIT_STD430 :
6468                                                          SHADERTEMPLATE_STRIDE32BIT_STD140);
6469         specs["types"]     = getStructShaderComponet(SHADERTEMPLATE_TYPES);
6470 
6471         fragments["capability"] = capabilities.specialize(specs);
6472         fragments["decoration"] = decoration.specialize(specs);
6473         fragments["pre_main"]   = preMain.specialize(specs);
6474 
6475         resources.inputs.push_back(Resource(BufferSp(new Float32Buffer(float32Data)), CAPABILITIES[capIdx].dtype));
6476         resources.outputs.push_back(
6477             Resource(BufferSp(new Float16Buffer(float16Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6478         resources.verifyIO = (VK_DESCRIPTOR_TYPE_STORAGE_BUFFER == CAPABILITIES[capIdx].dtype) ?
6479                                  graphicsCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD430,
6480                                                      SHADERTEMPLATE_STRIDE16BIT_STD430> :
6481                                  graphicsCheckStruct<float, deFloat16, SHADERTEMPLATE_STRIDE32BIT_STD140,
6482                                                      SHADERTEMPLATE_STRIDE16BIT_STD430>;
6483 
6484         VulkanFeatures features;
6485 
6486         features.coreFeatures.vertexPipelineStoresAndAtomics = true;
6487         features.coreFeatures.fragmentStoresAndAtomics       = true;
6488         features.ext16BitStorage.storageBuffer16BitAccess    = true;
6489 
6490         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup,
6491                                 features);
6492     }
6493 }
6494 
addGraphics16bitStructMixedTypesGroup(tcu::TestCaseGroup * group)6495 void addGraphics16bitStructMixedTypesGroup(tcu::TestCaseGroup *group)
6496 {
6497     de::Random rnd(deStringHash(group->getName()));
6498     map<string, string> fragments;
6499     vector<string> extensions;
6500     RGBA defaultColors[4];
6501     const StringTemplate capabilities("OpCapability StorageUniformBufferBlock16\n"
6502                                       "${cap}\n");
6503     vector<int16_t> outData(getStructSize(SHADERTEMPLATE_STRIDEMIX_STD430), 0u);
6504 
6505     extensions.push_back("VK_KHR_16bit_storage");
6506     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"\n";
6507 
6508     getDefaultColors(defaultColors);
6509 
6510     const StringTemplate preMain(
6511         "\n" //Types
6512         "%i16    = OpTypeInt 16 1\n"
6513         "%v2i16  = OpTypeVector %i16 2\n"
6514         "%v3i16  = OpTypeVector %i16 3\n"
6515         "%v4i16  = OpTypeVector %i16 4\n"
6516         "\n" //Consta value
6517         "%zero     = OpConstant %i32 0\n"
6518         "%c_i32_5  = OpConstant %i32 5\n"
6519         "%c_i32_6  = OpConstant %i32 6\n"
6520         "%c_i32_7  = OpConstant %i32 7\n"
6521         "%c_i32_8  = OpConstant %i32 8\n"
6522         "%c_i32_9  = OpConstant %i32 9\n"
6523         "%c_i32_10 = OpConstant %i32 10\n"
6524         "%c_i32_11 = OpConstant %i32 11\n"
6525         "%c_u32_7  = OpConstant %u32 7\n"
6526         "%c_u32_11 = OpConstant %u32 11\n"
6527         "\n" //Arrays & Structs
6528         "%v2b16NestedArr11In  = OpTypeArray %v2i16 %c_u32_11\n"
6529         "%b32NestedArr11In   = OpTypeArray %i32 %c_u32_11\n"
6530         "%sb16Arr11In         = OpTypeArray %i16 %c_u32_11\n"
6531         "%sb32Arr11In        = OpTypeArray %i32 %c_u32_11\n"
6532         "%sNestedIn          = OpTypeStruct %i16 %i32 %v2b16NestedArr11In %b32NestedArr11In\n"
6533         "%sNestedArr11In     = OpTypeArray %sNestedIn %c_u32_11\n"
6534         "%structIn           = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11In "
6535         "%sb16Arr11In %sb32Arr11In\n"
6536         "%structArr7In       = OpTypeArray %structIn %c_u32_7\n"
6537         "%v2b16NestedArr11Out = OpTypeArray %v2i16 %c_u32_11\n"
6538         "%b32NestedArr11Out  = OpTypeArray %i32 %c_u32_11\n"
6539         "%sb16Arr11Out        = OpTypeArray %i16 %c_u32_11\n"
6540         "%sb32Arr11Out       = OpTypeArray %i32 %c_u32_11\n"
6541         "%sNestedOut         = OpTypeStruct %i16 %i32 %v2b16NestedArr11Out %b32NestedArr11Out\n"
6542         "%sNestedArr11Out    = OpTypeArray %sNestedOut %c_u32_11\n"
6543         "%structOut          = OpTypeStruct %i16 %i32 %v2i16 %v2i32 %v3i16 %v3i32 %v4i16 %v4i32 %sNestedArr11Out "
6544         "%sb16Arr11Out %sb32Arr11Out\n"
6545         "%structArr7Out      = OpTypeArray %structOut %c_u32_7\n"
6546         "\n" //Pointers
6547         "%i16outPtr    = OpTypePointer Uniform %i16\n"
6548         "%v2i16outPtr  = OpTypePointer Uniform %v2i16\n"
6549         "%v3i16outPtr  = OpTypePointer Uniform %v3i16\n"
6550         "%v4i16outPtr  = OpTypePointer Uniform %v4i16\n"
6551         "%i32outPtr   = OpTypePointer Uniform %i32\n"
6552         "%v2i32outPtr = OpTypePointer Uniform %v2i32\n"
6553         "%v3i32outPtr = OpTypePointer Uniform %v3i32\n"
6554         "%v4i32outPtr = OpTypePointer Uniform %v4i32\n"
6555         "%uvec3ptr = OpTypePointer Input %v3u32\n"
6556         "\n" //SSBO IN
6557         "%SSBO_IN    = OpTypeStruct %structArr7In\n"
6558         "%up_SSBOIN  = OpTypePointer Uniform %SSBO_IN\n"
6559         "%ssboIN     = OpVariable %up_SSBOIN Uniform\n"
6560         "\n" //SSBO OUT
6561         "%SSBO_OUT   = OpTypeStruct %structArr7Out\n"
6562         "%up_SSBOOUT = OpTypePointer Uniform %SSBO_OUT\n"
6563         "%ssboOUT    = OpVariable %up_SSBOOUT Uniform\n");
6564 
6565     const StringTemplate decoration("${OutOffsets}"
6566                                     "${InOffsets}"
6567                                     "\n" //SSBO IN
6568                                     "OpMemberDecorate %SSBO_IN 0 Offset 0\n"
6569                                     "OpDecorate %ssboIN DescriptorSet 0\n"
6570                                     "OpDecorate %SSBO_IN ${storage}\n"
6571                                     "OpDecorate %SSBO_OUT BufferBlock\n"
6572                                     "OpDecorate %ssboIN Binding 0\n"
6573                                     "\n" //SSBO OUT
6574                                     "OpMemberDecorate %SSBO_OUT 0 Offset 0\n"
6575                                     "OpDecorate %ssboOUT DescriptorSet 0\n"
6576                                     "OpDecorate %ssboOUT Binding 1\n");
6577 
6578     const StringTemplate testFun(
6579         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6580         "%param     = OpFunctionParameter %v4f32\n"
6581         "%label     = OpLabel\n"
6582         "%ndxArrx   = OpVariable %fp_i32  Function\n"
6583         "%ndxArry   = OpVariable %fp_i32  Function\n"
6584         "%ndxArrz   = OpVariable %fp_i32  Function\n"
6585         "${xBeginLoop}"
6586         "\n" //strutOut.b16 = strutIn.b16
6587         "%inP1  = OpAccessChain %i16${inPtr} %ssboIN %zero %Valx %zero\n"
6588         "%inV1  = OpLoad %i16 %inP1\n"
6589         "%outP1 = OpAccessChain %i16outPtr %ssboOUT %zero %Valx %zero\n"
6590         "OpStore %outP1 %inV1\n"
6591         "\n" //strutOut.b32 = strutIn.b32
6592         "%inP2  = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_1\n"
6593         "%inV2  = OpLoad %i32 %inP2\n"
6594         "%outP2 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_1\n"
6595         "OpStore %outP2 %inV2\n"
6596         "\n" //strutOut.v2b16 = strutIn.v2b16
6597         "%inP3  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %Valx %c_i32_2\n"
6598         "%inV3  = OpLoad %v2i16 %inP3\n"
6599         "%outP3 = OpAccessChain %v2i16outPtr %ssboOUT %zero %Valx %c_i32_2\n"
6600         "OpStore %outP3 %inV3\n"
6601         "\n" //strutOut.v2b32 = strutIn.v2b32
6602         "%inP4  = OpAccessChain %v2i32${inPtr} %ssboIN %zero %Valx %c_i32_3\n"
6603         "%inV4  = OpLoad %v2i32 %inP4\n"
6604         "%outP4 = OpAccessChain %v2i32outPtr %ssboOUT %zero %Valx %c_i32_3\n"
6605         "OpStore %outP4 %inV4\n"
6606         "\n" //strutOut.v3b16 = strutIn.v3b16
6607         "%inP5  = OpAccessChain %v3i16${inPtr} %ssboIN %zero %Valx %c_i32_4\n"
6608         "%inV5  = OpLoad %v3i16 %inP5\n"
6609         "%outP5 = OpAccessChain %v3i16outPtr %ssboOUT %zero %Valx %c_i32_4\n"
6610         "OpStore %outP5 %inV5\n"
6611         "\n" //strutOut.v3b32 = strutIn.v3b32
6612         "%inP6  = OpAccessChain %v3i32${inPtr} %ssboIN %zero %Valx %c_i32_5\n"
6613         "%inV6  = OpLoad %v3i32 %inP6\n"
6614         "%outP6 = OpAccessChain %v3i32outPtr %ssboOUT %zero %Valx %c_i32_5\n"
6615         "OpStore %outP6 %inV6\n"
6616         "\n" //strutOut.v4b16 = strutIn.v4b16
6617         "%inP7  = OpAccessChain %v4i16${inPtr} %ssboIN %zero %Valx %c_i32_6\n"
6618         "%inV7  = OpLoad %v4i16 %inP7\n"
6619         "%outP7 = OpAccessChain %v4i16outPtr %ssboOUT %zero %Valx %c_i32_6\n"
6620         "OpStore %outP7 %inV7\n"
6621         "\n" //strutOut.v4b32 = strutIn.v4b32
6622         "%inP8  = OpAccessChain %v4i32${inPtr} %ssboIN %zero %Valx %c_i32_7\n"
6623         "%inV8  = OpLoad %v4i32 %inP8\n"
6624         "%outP8 = OpAccessChain %v4i32outPtr %ssboOUT %zero %Valx %c_i32_7\n"
6625         "OpStore %outP8 %inV8\n"
6626         "${yBeginLoop}"
6627         "\n" //strutOut.b16[y] = strutIn.b16[y]
6628         "%inP9  = OpAccessChain %i16${inPtr} %ssboIN %zero %Valx %c_i32_9 %Valy\n"
6629         "%inV9  = OpLoad %i16 %inP9\n"
6630         "%outP9 = OpAccessChain %i16outPtr %ssboOUT %zero %Valx %c_i32_9 %Valy\n"
6631         "OpStore %outP9 %inV9\n"
6632         "\n" //strutOut.b32[y] = strutIn.b32[y]
6633         "%inP10  = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_10 %Valy\n"
6634         "%inV10  = OpLoad %i32 %inP10\n"
6635         "%outP10 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_10 %Valy\n"
6636         "OpStore %outP10 %inV10\n"
6637         "\n" //strutOut.strutNestedOut[y].b16 = strutIn.strutNestedIn[y].b16
6638         "%inP11 = OpAccessChain %i16${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %zero\n"
6639         "%inV11 = OpLoad %i16 %inP11\n"
6640         "%outP11 = OpAccessChain %i16outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %zero\n"
6641         "OpStore %outP11 %inV11\n"
6642         "\n" //strutOut.strutNestedOut[y].b32 = strutIn.strutNestedIn[y].b32
6643         "%inP12 = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %c_i32_1\n"
6644         "%inV12 = OpLoad %i32 %inP12\n"
6645         "%outP12 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %c_i32_1\n"
6646         "OpStore %outP12 %inV12\n"
6647         "${zBeginLoop}"
6648         "\n" //strutOut.strutNestedOut[y].v2b16[valNdx] = strutIn.strutNestedIn[y].v2b16[valNdx]
6649         "%inP13  = OpAccessChain %v2i16${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %c_i32_2 %Valz\n"
6650         "%inV13  = OpLoad %v2i16 %inP13\n"
6651         "%outP13 = OpAccessChain %v2i16outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %c_i32_2 %Valz\n"
6652         "OpStore %outP13 %inV13\n"
6653         "\n" //strutOut.strutNestedOut[y].b32[valNdx] = strutIn.strutNestedIn[y].b32[valNdx]
6654         "%inP14  = OpAccessChain %i32${inPtr} %ssboIN %zero %Valx %c_i32_8 %Valy %c_i32_3 %Valz\n"
6655         "%inV14  = OpLoad %i32 %inP14\n"
6656         "%outP14 = OpAccessChain %i32outPtr %ssboOUT %zero %Valx %c_i32_8 %Valy %c_i32_3 %Valz\n"
6657         "OpStore %outP14 %inV14\n"
6658         "${zEndLoop}"
6659         "${yEndLoop}"
6660         "${xEndLoop}"
6661         "\n"
6662         "OpBranch %ExitLabel\n"
6663         "%ExitLabel = OpLabel\n"
6664         "OpReturnValue %param\n"
6665         "OpFunctionEnd\n");
6666 
6667     for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6668     { // int
6669         const bool isUniform   = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER == CAPABILITIES[capIdx].dtype;
6670         vector<int16_t> inData = isUniform ? dataMixStd140(rnd) : dataMixStd430(rnd);
6671         GraphicsResources resources;
6672         map<string, string> specsLoop;
6673         map<string, string> specsOffset;
6674         map<string, string> specs;
6675         VulkanFeatures features;
6676         string testName = string(CAPABILITIES[capIdx].name);
6677 
6678         specsLoop["exeCount"] = "c_i32_7";
6679         specsLoop["loopName"] = "x";
6680         specs["xBeginLoop"]   = beginLoop(specsLoop);
6681         specs["xEndLoop"]     = endLoop(specsLoop);
6682 
6683         specsLoop["exeCount"] = "c_i32_11";
6684         specsLoop["loopName"] = "y";
6685         specs["yBeginLoop"]   = beginLoop(specsLoop);
6686         specs["yEndLoop"]     = endLoop(specsLoop);
6687 
6688         specsLoop["exeCount"] = "c_i32_11";
6689         specsLoop["loopName"] = "z";
6690         specs["zBeginLoop"]   = beginLoop(specsLoop);
6691         specs["zEndLoop"]     = endLoop(specsLoop);
6692 
6693         specs["storage"]     = isUniform ? "Block" : "BufferBlock";
6694         specs["cap"]         = isUniform ? "OpCapability " + string(CAPABILITIES[capIdx].cap) : "";
6695         specs["inPtr"]       = "outPtr";
6696         specsOffset["InOut"] = "In";
6697         specs["InOffsets"]   = StringTemplate(isUniform ? getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD140) :
6698                                                           getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430))
6699                                  .specialize(specsOffset);
6700         specsOffset["InOut"] = "Out";
6701         specs["OutOffsets"] =
6702             StringTemplate(getStructShaderComponet(SHADERTEMPLATE_STRIDEMIX_STD430)).specialize(specsOffset);
6703 
6704         fragments["capability"] = capabilities.specialize(specs);
6705         fragments["decoration"] = decoration.specialize(specs);
6706         fragments["pre_main"]   = preMain.specialize(specs);
6707         fragments["testfun"]    = testFun.specialize(specs);
6708 
6709         resources.verifyIO =
6710             isUniform ?
6711                 graphicsCheckStruct<int16_t, int16_t, SHADERTEMPLATE_STRIDEMIX_STD140,
6712                                     SHADERTEMPLATE_STRIDEMIX_STD430> :
6713                 graphicsCheckStruct<int16_t, int16_t, SHADERTEMPLATE_STRIDEMIX_STD430, SHADERTEMPLATE_STRIDEMIX_STD430>;
6714         resources.inputs.push_back(Resource(BufferSp(new Int16Buffer(inData)), CAPABILITIES[capIdx].dtype));
6715         resources.outputs.push_back(Resource(BufferSp(new Int16Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6716 
6717         features                                             = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
6718         features.coreFeatures.vertexPipelineStoresAndAtomics = true;
6719         features.coreFeatures.fragmentStoresAndAtomics       = true;
6720 
6721         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, group,
6722                                 features);
6723     }
6724 }
6725 
addGraphics16BitStorageInputOutputFloat16To64Group(tcu::TestCaseGroup * testGroup)6726 void addGraphics16BitStorageInputOutputFloat16To64Group(tcu::TestCaseGroup *testGroup)
6727 {
6728     de::Random rnd(deStringHash(testGroup->getName()));
6729     RGBA defaultColors[4];
6730     vector<string> extensions;
6731     map<string, string> fragments = passthruFragments();
6732     const uint32_t numDataPoints  = 64;
6733     vector<deFloat16> float16Data(getFloat16s(rnd, numDataPoints));
6734     vector<double> float64Data;
6735 
6736     float64Data.reserve(numDataPoints);
6737     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
6738         float64Data.push_back(deFloat16To64(float16Data[numIdx]));
6739 
6740     extensions.push_back("VK_KHR_16bit_storage");
6741 
6742     fragments["capability"] = "OpCapability StorageInputOutput16\n"
6743                               "OpCapability Float64\n";
6744     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
6745 
6746     getDefaultColors(defaultColors);
6747 
6748     struct Case
6749     {
6750         const char *name;
6751         const char *interfaceOpCall;
6752         const char *interfaceOpFunc;
6753         const char *preMain;
6754         const char *inputType;
6755         const char *outputType;
6756         uint32_t numPerCase;
6757         uint32_t numElements;
6758     };
6759 
6760     Case cases[] = {{
6761                         // Scalar cases
6762                         "scalar",
6763 
6764                         "OpFConvert %f64",
6765                         "",
6766 
6767                         "             %f16 = OpTypeFloat 16\n"
6768                         "             %f64 = OpTypeFloat 64\n"
6769                         "                %v4f64 = OpTypeVector %f64 4\n"
6770                         "          %ip_f16 = OpTypePointer Input %f16\n"
6771                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
6772                         "        %ip_a3f16 = OpTypePointer Input %a3f16\n"
6773                         "%f64_f16_function = OpTypeFunction %f64 %f16\n"
6774                         "           %a3f64 = OpTypeArray %f64 %c_i32_3\n"
6775                         "            %op_f64 = OpTypePointer Output %f64\n"
6776                         "        %op_a3f64 = OpTypePointer Output %a3f64\n",
6777 
6778                         "f16",
6779                         "f64",
6780                         4,
6781                         1,
6782                     },
6783                     {
6784                         // Vector cases
6785                         "vector",
6786 
6787                         "OpFConvert %v2f64",
6788                         "",
6789 
6790                         "                 %f16 = OpTypeFloat 16\n"
6791                         "                %v2f16 = OpTypeVector %f16 2\n"
6792                         "                 %f64 = OpTypeFloat 64\n"
6793                         "                %v2f64 = OpTypeVector %f64 2\n"
6794                         "                %v4f64 = OpTypeVector %f64 4\n"
6795                         "            %ip_v2f16 = OpTypePointer Input %v2f16\n"
6796                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
6797                         "          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
6798                         "%v2f64_v2f16_function = OpTypeFunction %v2f64 %v2f16\n"
6799                         "             %a3v2f64 = OpTypeArray %v2f64 %c_i32_3\n"
6800                         "            %op_f64 = OpTypePointer Output %f64\n"
6801                         "            %op_v2f64 = OpTypePointer Output %v2f64\n"
6802                         "            %op_v4f64 = OpTypePointer Output %v4f64\n"
6803                         "          %op_a3v2f64 = OpTypePointer Output %a3v2f64\n",
6804 
6805                         "v2f16",
6806                         "v2f64",
6807                         2 * 4,
6808                         2,
6809                     }};
6810 
6811     VulkanFeatures requiredFeatures;
6812 
6813     requiredFeatures.coreFeatures.shaderFloat64           = true;
6814     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
6815 
6816     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
6817     {
6818         fragments["interface_op_call"] = cases[caseIdx].interfaceOpCall;
6819         fragments["interface_op_func"] = cases[caseIdx].interfaceOpFunc;
6820         fragments["pre_main"]          = cases[caseIdx].preMain;
6821 
6822         fragments["input_type"]  = cases[caseIdx].inputType;
6823         fragments["output_type"] = cases[caseIdx].outputType;
6824 
6825         GraphicsInterfaces interfaces;
6826         const uint32_t numPerCase = cases[caseIdx].numPerCase;
6827         vector<deFloat16> subInputs(numPerCase);
6828         vector<double> subOutputs(numPerCase);
6829 
6830         for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
6831         {
6832             string testName = string(cases[caseIdx].name) + numberToString(caseNdx);
6833 
6834             for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
6835             {
6836                 subInputs[numNdx]  = float16Data[caseNdx * numPerCase + numNdx];
6837                 subOutputs[numNdx] = float64Data[caseNdx * numPerCase + numNdx];
6838             }
6839             interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16),
6840                                                      BufferSp(new Float16Buffer(subInputs))),
6841                                       std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT64),
6842                                                      BufferSp(new Float64Buffer(subOutputs))));
6843             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
6844                                     testGroup, requiredFeatures);
6845         }
6846     }
6847 }
6848 
addGraphics16BitStorageUniformFloat16To64Group(tcu::TestCaseGroup * testGroup)6849 void addGraphics16BitStorageUniformFloat16To64Group(tcu::TestCaseGroup *testGroup)
6850 {
6851     de::Random rnd(deStringHash(testGroup->getName()));
6852     map<string, string> fragments;
6853     vector<string> extensions;
6854     const uint32_t numDataPoints = 256;
6855     RGBA defaultColors[4];
6856     const StringTemplate capabilities("OpCapability ${cap}\n"
6857                                       "OpCapability Float64\n");
6858     vector<deFloat16> float16Data = getFloat16s(rnd, numDataPoints);
6859 
6860     struct ConstantIndex
6861     {
6862         bool useConstantIndex;
6863         uint32_t constantIndex;
6864     };
6865 
6866     ConstantIndex constantIndices[] = {{false, 0}, {true, 4}, {true, 5}, {true, 6}};
6867 
6868     extensions.push_back("VK_KHR_16bit_storage");
6869 
6870     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
6871 
6872     getDefaultColors(defaultColors);
6873 
6874     { // scalar cases
6875         const StringTemplate preMain("      %f16 = OpTypeFloat 16\n"
6876                                      "      %f64 = OpTypeFloat 64\n"
6877                                      "%c_i32_256 = OpConstant %i32 256\n"
6878                                      " %c_i32_ci = OpConstant %i32 ${constarrayidx}\n"
6879                                      "   %up_f64 = OpTypePointer Uniform %f64\n"
6880                                      "   %up_f16 = OpTypePointer Uniform %f16\n"
6881                                      "   %ra_f64 = OpTypeArray %f64 %c_i32_256\n"
6882                                      "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
6883                                      "   %SSBO64 = OpTypeStruct %ra_f64\n"
6884                                      "   %SSBO16 = OpTypeStruct %ra_f16\n"
6885                                      "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
6886                                      "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
6887                                      "   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
6888                                      "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
6889 
6890         const StringTemplate decoration("OpDecorate %ra_f64 ArrayStride 8\n"
6891                                         "OpDecorate %ra_f16 ArrayStride ${stride16}\n"
6892                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
6893                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
6894                                         "OpDecorate %SSBO64 BufferBlock\n"
6895                                         "OpDecorate %SSBO16 ${indecor}\n"
6896                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
6897                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
6898                                         "OpDecorate %ssbo64 Binding 1\n"
6899                                         "OpDecorate %ssbo16 Binding 0\n");
6900 
6901         // ssbo64[] <- convert ssbo16[] to 64bit float
6902         const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
6903                                      "    %param = OpFunctionParameter %v4f32\n"
6904 
6905                                      "%entry = OpLabel\n"
6906                                      "    %i = OpVariable %fp_i32 Function\n"
6907                                      "         OpStore %i %c_i32_0\n"
6908                                      "         OpBranch %loop\n"
6909 
6910                                      " %loop = OpLabel\n"
6911                                      "   %15 = OpLoad %i32 %i\n"
6912                                      "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
6913                                      "         OpLoopMerge %merge %inc None\n"
6914                                      "         OpBranchConditional %lt %write %merge\n"
6915 
6916                                      "%write = OpLabel\n"
6917                                      "   %30 = OpLoad %i32 %i\n"
6918                                      "  %src = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
6919                                      "%val16 = OpLoad %f16 %src\n"
6920                                      "%val64 = OpFConvert %f64 %val16\n"
6921                                      "  %dst = OpAccessChain %up_f64 %ssbo64 %c_i32_0 %30\n"
6922                                      "         OpStore %dst %val64\n"
6923                                      "         OpBranch %inc\n"
6924 
6925                                      "  %inc = OpLabel\n"
6926                                      "   %37 = OpLoad %i32 %i\n"
6927                                      "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
6928                                      "         OpStore %i %39\n"
6929                                      "         OpBranch %loop\n"
6930 
6931                                      "%merge = OpLabel\n"
6932                                      "         OpReturnValue %param\n"
6933 
6934                                      "OpFunctionEnd\n");
6935 
6936         for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
6937         {
6938             for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
6939             {
6940                 GraphicsResources resources;
6941                 map<string, string> specs;
6942                 string testName   = string(CAPABILITIES[capIdx].name) + "_scalar_float";
6943                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
6944                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
6945                 const bool isUBO  = CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
6946 
6947                 specs["cap"]           = CAPABILITIES[capIdx].cap;
6948                 specs["indecor"]       = CAPABILITIES[capIdx].decor;
6949                 specs["constarrayidx"] = de::toString(constIdx);
6950                 specs["stride16"]      = isUBO ? "16" : "2";
6951 
6952                 if (useConstIdx)
6953                     specs["arrayindex"] = "c_i32_ci";
6954                 else
6955                     specs["arrayindex"] = "30";
6956 
6957                 fragments["capability"] = capabilities.specialize(specs);
6958                 fragments["decoration"] = decoration.specialize(specs);
6959                 fragments["pre_main"]   = preMain.specialize(specs);
6960                 fragments["testfun"]    = testFun.specialize(specs);
6961 
6962                 vector<double> float64Data;
6963                 float64Data.reserve(numDataPoints);
6964                 for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
6965                     float64Data.push_back(deFloat16To64(float16Data[useConstIdx ? constIdx : numIdx]));
6966 
6967                 resources.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data, isUBO ? 14 : 0)),
6968                                                     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6969                 resources.outputs.push_back(
6970                     Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
6971                 resources.verifyIO = check64BitFloats;
6972                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
6973 
6974                 if (useConstIdx)
6975                     testName += string("_const_idx_") + de::toString(constIdx);
6976 
6977                 VulkanFeatures features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
6978 
6979                 features.coreFeatures.shaderFloat64                  = true;
6980                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
6981 
6982                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
6983                                         testGroup, features);
6984             }
6985         }
6986     }
6987 
6988     { // vector cases
6989         const StringTemplate preMain("      %f16 = OpTypeFloat 16\n"
6990                                      "      %f64 = OpTypeFloat 64\n"
6991                                      "%c_i32_128 = OpConstant %i32 128\n"
6992                                      "%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
6993                                      "     %v2f16 = OpTypeVector %f16 2\n"
6994                                      "     %v2f64 = OpTypeVector %f64 2\n"
6995                                      " %up_v2f64 = OpTypePointer Uniform %v2f64\n"
6996                                      " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
6997                                      " %ra_v2f64 = OpTypeArray %v2f64 %c_i32_128\n"
6998                                      " %ra_v2f16 = OpTypeArray %v2f16 %c_i32_128\n"
6999                                      "   %SSBO64 = OpTypeStruct %ra_v2f64\n"
7000                                      "   %SSBO16 = OpTypeStruct %ra_v2f16\n"
7001                                      "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7002                                      "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7003                                      "   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7004                                      "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
7005 
7006         const StringTemplate decoration("OpDecorate %ra_v2f64 ArrayStride 16\n"
7007                                         "OpDecorate %ra_v2f16 ArrayStride ${stride16}\n"
7008                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
7009                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
7010                                         "OpDecorate %SSBO64 BufferBlock\n"
7011                                         "OpDecorate %SSBO16 ${indecor}\n"
7012                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
7013                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
7014                                         "OpDecorate %ssbo64 Binding 1\n"
7015                                         "OpDecorate %ssbo16 Binding 0\n");
7016 
7017         // ssbo64[] <- convert ssbo16[] to 64bit float
7018         const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7019                                      "    %param = OpFunctionParameter %v4f32\n"
7020 
7021                                      "%entry = OpLabel\n"
7022                                      "    %i = OpVariable %fp_i32 Function\n"
7023                                      "         OpStore %i %c_i32_0\n"
7024                                      "         OpBranch %loop\n"
7025 
7026                                      " %loop = OpLabel\n"
7027                                      "   %15 = OpLoad %i32 %i\n"
7028                                      "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
7029                                      "         OpLoopMerge %merge %inc None\n"
7030                                      "         OpBranchConditional %lt %write %merge\n"
7031 
7032                                      "%write = OpLabel\n"
7033                                      "   %30 = OpLoad %i32 %i\n"
7034                                      "  %src = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %${arrayindex}\n"
7035                                      "%val16 = OpLoad %v2f16 %src\n"
7036                                      "%val64 = OpFConvert %v2f64 %val16\n"
7037                                      "  %dst = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30\n"
7038                                      "         OpStore %dst %val64\n"
7039                                      "         OpBranch %inc\n"
7040 
7041                                      "  %inc = OpLabel\n"
7042                                      "   %37 = OpLoad %i32 %i\n"
7043                                      "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7044                                      "         OpStore %i %39\n"
7045                                      "         OpBranch %loop\n"
7046 
7047                                      "%merge = OpLabel\n"
7048                                      "         OpReturnValue %param\n"
7049 
7050                                      "OpFunctionEnd\n");
7051 
7052         for (uint32_t constIndexIdx = 0; constIndexIdx < DE_LENGTH_OF_ARRAY(constantIndices); ++constIndexIdx)
7053         {
7054             for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7055             {
7056                 GraphicsResources resources;
7057                 map<string, string> specs;
7058                 string testName   = string(CAPABILITIES[capIdx].name) + "_vector_float";
7059                 bool useConstIdx  = constantIndices[constIndexIdx].useConstantIndex;
7060                 uint32_t constIdx = constantIndices[constIndexIdx].constantIndex;
7061                 const bool isUBO  = CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
7062 
7063                 specs["cap"]           = CAPABILITIES[capIdx].cap;
7064                 specs["indecor"]       = CAPABILITIES[capIdx].decor;
7065                 specs["constarrayidx"] = de::toString(constIdx);
7066                 specs["stride16"]      = isUBO ? "16" : "4";
7067 
7068                 if (useConstIdx)
7069                     specs["arrayindex"] = "c_i32_ci";
7070                 else
7071                     specs["arrayindex"] = "30";
7072 
7073                 fragments["capability"] = capabilities.specialize(specs);
7074                 fragments["decoration"] = decoration.specialize(specs);
7075                 fragments["pre_main"]   = preMain.specialize(specs);
7076                 fragments["testfun"]    = testFun.specialize(specs);
7077 
7078                 vector<double> float64Data;
7079                 float64Data.reserve(numDataPoints);
7080                 for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
7081                     float64Data.push_back(
7082                         deFloat16To64(float16Data[constantIndices[constIndexIdx].useConstantIndex ?
7083                                                       (constantIndices[constIndexIdx].constantIndex * 2 + numIdx % 2) :
7084                                                       numIdx]));
7085 
7086                 vector<tcu::Vector<deFloat16, 2>> float16Vec2Data(float16Data.size() / 2);
7087                 for (size_t elemIdx = 0; elemIdx < float16Data.size(); elemIdx++)
7088                 {
7089                     float16Vec2Data[elemIdx / 2][elemIdx % 2] = float16Data[elemIdx];
7090                 }
7091                 typedef Buffer<tcu::Vector<deFloat16, 2>> Float16Vec2Buffer;
7092                 resources.inputs.push_back(Resource(BufferSp(new Float16Vec2Buffer(float16Vec2Data, isUBO ? 12 : 0)),
7093                                                     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7094                 resources.outputs.push_back(
7095                     Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7096                 resources.verifyIO = check64BitFloats;
7097                 resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
7098 
7099                 if (constantIndices[constIndexIdx].useConstantIndex)
7100                     testName += string("_const_idx_") + de::toString(constantIndices[constIndexIdx].constantIndex);
7101 
7102                 VulkanFeatures features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7103 
7104                 features.coreFeatures.shaderFloat64                  = true;
7105                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
7106 
7107                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
7108                                         testGroup, features);
7109             }
7110         }
7111     }
7112 
7113     { // matrix cases
7114         fragments["pre_main"] = " %c_i32_32 = OpConstant %i32 32\n"
7115                                 "      %f16 = OpTypeFloat 16\n"
7116                                 "      %f64 = OpTypeFloat 64\n"
7117                                 "    %v2f16 = OpTypeVector %f16 2\n"
7118                                 "    %v2f64 = OpTypeVector %f64 2\n"
7119                                 "  %m4x2f64 = OpTypeMatrix %v2f64 4\n"
7120                                 "  %m4x2f16 = OpTypeMatrix %v2f16 4\n"
7121                                 " %up_v2f64 = OpTypePointer Uniform %v2f64\n"
7122                                 " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
7123                                 "%a8m4x2f64 = OpTypeArray %m4x2f64 %c_i32_32\n"
7124                                 "%a8m4x2f16 = OpTypeArray %m4x2f16 %c_i32_32\n"
7125                                 "   %SSBO64 = OpTypeStruct %a8m4x2f64\n"
7126                                 "   %SSBO16 = OpTypeStruct %a8m4x2f16\n"
7127                                 "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7128                                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7129                                 "   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7130                                 "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7131 
7132         const StringTemplate decoration("OpDecorate %a8m4x2f64 ArrayStride 64\n"
7133                                         "OpDecorate %a8m4x2f16 ArrayStride 16\n"
7134                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
7135                                         "OpMemberDecorate %SSBO64 0 ColMajor\n"
7136                                         "OpMemberDecorate %SSBO64 0 MatrixStride 16\n"
7137                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
7138                                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
7139                                         "OpMemberDecorate %SSBO16 0 MatrixStride 4\n"
7140                                         "OpDecorate %SSBO64 BufferBlock\n"
7141                                         "OpDecorate %SSBO16 ${indecor}\n"
7142                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
7143                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
7144                                         "OpDecorate %ssbo64 Binding 1\n"
7145                                         "OpDecorate %ssbo16 Binding 0\n");
7146 
7147         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7148                                "    %param = OpFunctionParameter %v4f32\n"
7149 
7150                                "%entry = OpLabel\n"
7151                                "    %i = OpVariable %fp_i32 Function\n"
7152                                "         OpStore %i %c_i32_0\n"
7153                                "         OpBranch %loop\n"
7154 
7155                                " %loop = OpLabel\n"
7156                                "   %15 = OpLoad %i32 %i\n"
7157                                "   %lt = OpSLessThan %bool %15 %c_i32_32\n"
7158                                "         OpLoopMerge %merge %inc None\n"
7159                                "         OpBranchConditional %lt %write %merge\n"
7160 
7161                                "  %write = OpLabel\n"
7162                                "     %30 = OpLoad %i32 %i\n"
7163                                "  %src_0 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
7164                                "  %src_1 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
7165                                "  %src_2 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
7166                                "  %src_3 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
7167                                "%val16_0 = OpLoad %v2f16 %src_0\n"
7168                                "%val16_1 = OpLoad %v2f16 %src_1\n"
7169                                "%val16_2 = OpLoad %v2f16 %src_2\n"
7170                                "%val16_3 = OpLoad %v2f16 %src_3\n"
7171                                "%val64_0 = OpFConvert %v2f64 %val16_0\n"
7172                                "%val64_1 = OpFConvert %v2f64 %val16_1\n"
7173                                "%val64_2 = OpFConvert %v2f64 %val16_2\n"
7174                                "%val64_3 = OpFConvert %v2f64 %val16_3\n"
7175                                "  %dst_0 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_0\n"
7176                                "  %dst_1 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_1\n"
7177                                "  %dst_2 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_2\n"
7178                                "  %dst_3 = OpAccessChain %up_v2f64 %ssbo64 %c_i32_0 %30 %c_i32_3\n"
7179                                "           OpStore %dst_0 %val64_0\n"
7180                                "           OpStore %dst_1 %val64_1\n"
7181                                "           OpStore %dst_2 %val64_2\n"
7182                                "           OpStore %dst_3 %val64_3\n"
7183                                "           OpBranch %inc\n"
7184 
7185                                "  %inc = OpLabel\n"
7186                                "   %37 = OpLoad %i32 %i\n"
7187                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7188                                "         OpStore %i %39\n"
7189                                "         OpBranch %loop\n"
7190 
7191                                "%merge = OpLabel\n"
7192                                "         OpReturnValue %param\n"
7193 
7194                                "OpFunctionEnd\n";
7195 
7196         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7197         {
7198             GraphicsResources resources;
7199             map<string, string> specs;
7200             string testName = string(CAPABILITIES[capIdx].name) + "_matrix_float";
7201 
7202             specs["cap"]     = CAPABILITIES[capIdx].cap;
7203             specs["indecor"] = CAPABILITIES[capIdx].decor;
7204 
7205             fragments["capability"] = capabilities.specialize(specs);
7206             fragments["decoration"] = decoration.specialize(specs);
7207 
7208             vector<double> float64Data;
7209             float64Data.reserve(numDataPoints);
7210             for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
7211                 float64Data.push_back(deFloat16To64(float16Data[numIdx]));
7212 
7213             resources.inputs.push_back(
7214                 Resource(BufferSp(new Float16Buffer(float16Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7215             resources.outputs.push_back(
7216                 Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7217             resources.verifyIO = check64BitFloats;
7218             resources.inputs.back().setDescriptorType(CAPABILITIES[capIdx].dtype);
7219 
7220             VulkanFeatures features = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7221 
7222             features.coreFeatures.shaderFloat64                  = true;
7223             features.coreFeatures.vertexPipelineStoresAndAtomics = true;
7224 
7225             createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup,
7226                                     features);
7227         }
7228     }
7229 }
7230 
addGraphics16BitStoragePushConstantFloat16To64Group(tcu::TestCaseGroup * testGroup)7231 void addGraphics16BitStoragePushConstantFloat16To64Group(tcu::TestCaseGroup *testGroup)
7232 {
7233     de::Random rnd(deStringHash(testGroup->getName()));
7234     map<string, string> fragments;
7235     RGBA defaultColors[4];
7236     vector<string> extensions;
7237     GraphicsResources resources;
7238     PushConstants pcs;
7239     const uint32_t numDataPoints = 64;
7240     vector<deFloat16> float16Data(getFloat16s(rnd, numDataPoints));
7241     vector<double> float64Data;
7242     VulkanFeatures requiredFeatures;
7243 
7244     float64Data.reserve(numDataPoints);
7245     for (uint32_t numIdx = 0; numIdx < numDataPoints; ++numIdx)
7246         float64Data.push_back(deFloat16To64(float16Data[numIdx]));
7247 
7248     extensions.push_back("VK_KHR_16bit_storage");
7249 
7250     requiredFeatures.coreFeatures.shaderFloat64                  = true;
7251     requiredFeatures.ext16BitStorage.storagePushConstant16       = true;
7252     requiredFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
7253 
7254     fragments["capability"] = "OpCapability StoragePushConstant16\n"
7255                               "OpCapability Float64\n";
7256 
7257     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
7258 
7259     pcs.setPushConstant(BufferSp(new Float16Buffer(float16Data)));
7260     resources.outputs.push_back(Resource(BufferSp(new Float64Buffer(float64Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7261     resources.verifyIO = check64BitFloats;
7262 
7263     getDefaultColors(defaultColors);
7264 
7265     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7266                                  "    %param = OpFunctionParameter %v4f32\n"
7267 
7268                                  "%entry = OpLabel\n"
7269                                  "    %i = OpVariable %fp_i32 Function\n"
7270                                  "         OpStore %i %c_i32_0\n"
7271                                  "         OpBranch %loop\n"
7272 
7273                                  " %loop = OpLabel\n"
7274                                  "   %15 = OpLoad %i32 %i\n"
7275                                  "   %lt = OpSLessThan %bool %15 ${count}\n"
7276                                  "         OpLoopMerge %merge %inc None\n"
7277                                  "         OpBranchConditional %lt %write %merge\n"
7278 
7279                                  "%write = OpLabel\n"
7280                                  "   %30 = OpLoad %i32 %i\n"
7281                                  "  %src = OpAccessChain ${pp_type16} %pc16 %c_i32_0 %30 ${index0:opt}\n"
7282                                  "%val16 = OpLoad ${f_type16} %src\n"
7283                                  "%val64 = OpFConvert ${f_type64} %val16\n"
7284                                  "  %dst = OpAccessChain ${up_type64} %ssbo64 %c_i32_0 %30 ${index0:opt}\n"
7285                                  "         OpStore %dst %val64\n"
7286 
7287                                  "${store:opt}\n"
7288 
7289                                  "         OpBranch %inc\n"
7290 
7291                                  "  %inc = OpLabel\n"
7292                                  "   %37 = OpLoad %i32 %i\n"
7293                                  "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7294                                  "         OpStore %i %39\n"
7295                                  "         OpBranch %loop\n"
7296 
7297                                  "%merge = OpLabel\n"
7298                                  "         OpReturnValue %param\n"
7299 
7300                                  "OpFunctionEnd\n");
7301 
7302     { // Scalar cases
7303         fragments["pre_main"] = "           %f16 = OpTypeFloat 16\n"
7304                                 "           %f64 = OpTypeFloat 64\n"
7305                                 "      %c_i32_64 = OpConstant %i32 64\n" // Should be the same as numDataPoints
7306                                 "         %v4f64 = OpTypeVector %f64 4\n"
7307                                 "        %a64f16 = OpTypeArray %f16 %c_i32_64\n"
7308                                 "        %a64f64 = OpTypeArray %f64 %c_i32_64\n"
7309                                 "        %pp_f16 = OpTypePointer PushConstant %f16\n"
7310                                 "        %up_f64 = OpTypePointer Uniform %f64\n"
7311                                 "        %SSBO64 = OpTypeStruct %a64f64\n"
7312                                 "     %up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7313                                 "        %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7314                                 "          %PC16 = OpTypeStruct %a64f16\n"
7315                                 "       %pp_PC16 = OpTypePointer PushConstant %PC16\n"
7316                                 "          %pc16 = OpVariable %pp_PC16 PushConstant\n";
7317 
7318         fragments["decoration"] = "OpDecorate %a64f16 ArrayStride 2\n"
7319                                   "OpDecorate %a64f64 ArrayStride 8\n"
7320                                   "OpDecorate %SSBO64 BufferBlock\n"
7321                                   "OpMemberDecorate %SSBO64 0 Offset 0\n"
7322                                   "OpDecorate %PC16 Block\n"
7323                                   "OpMemberDecorate %PC16 0 Offset 0\n"
7324                                   "OpDecorate %ssbo64 DescriptorSet 0\n"
7325                                   "OpDecorate %ssbo64 Binding 0\n";
7326 
7327         map<string, string> specs;
7328 
7329         specs["count"]     = "%c_i32_64";
7330         specs["pp_type16"] = "%pp_f16";
7331         specs["f_type16"]  = "%f16";
7332         specs["f_type64"]  = "%f64";
7333         specs["up_type64"] = "%up_f64";
7334 
7335         fragments["testfun"] = testFun.specialize(specs);
7336 
7337         createTestsForAllStages("scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions,
7338                                 testGroup, requiredFeatures);
7339     }
7340 
7341     { // Vector cases
7342         fragments["pre_main"] = "      %f16 = OpTypeFloat 16\n"
7343                                 "      %f64 = OpTypeFloat 64\n"
7344                                 "    %v4f16 = OpTypeVector %f16 4\n"
7345                                 "    %v4f64 = OpTypeVector %f64 4\n"
7346                                 "    %v2f64 = OpTypeVector %f64 2\n"
7347                                 " %c_i32_16 = OpConstant %i32 16\n"
7348                                 " %a16v4f16 = OpTypeArray %v4f16 %c_i32_16\n"
7349                                 " %a16v4f64 = OpTypeArray %v4f64 %c_i32_16\n"
7350                                 " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
7351                                 " %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7352                                 "   %SSBO64 = OpTypeStruct %a16v4f64\n"
7353                                 "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7354                                 "   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7355                                 "     %PC16 = OpTypeStruct %a16v4f16\n"
7356                                 "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
7357                                 "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
7358 
7359         fragments["decoration"] = "OpDecorate %a16v4f16 ArrayStride 8\n"
7360                                   "OpDecorate %a16v4f64 ArrayStride 32\n"
7361                                   "OpDecorate %SSBO64 BufferBlock\n"
7362                                   "OpMemberDecorate %SSBO64 0 Offset 0\n"
7363                                   "OpDecorate %PC16 Block\n"
7364                                   "OpMemberDecorate %PC16 0 Offset 0\n"
7365                                   "OpDecorate %ssbo64 DescriptorSet 0\n"
7366                                   "OpDecorate %ssbo64 Binding 0\n";
7367 
7368         map<string, string> specs;
7369 
7370         specs["count"]     = "%c_i32_16";
7371         specs["pp_type16"] = "%pp_v4f16";
7372         specs["f_type16"]  = "%v4f16";
7373         specs["f_type64"]  = "%v4f64";
7374         specs["up_type64"] = "%up_v4f64";
7375 
7376         fragments["testfun"] = testFun.specialize(specs);
7377 
7378         createTestsForAllStages("vector", defaultColors, defaultColors, fragments, pcs, resources, extensions,
7379                                 testGroup, requiredFeatures);
7380     }
7381 
7382     { // Matrix cases
7383         fragments["pre_main"] = "  %c_i32_8 = OpConstant %i32 8\n"
7384                                 "      %f16 = OpTypeFloat 16\n"
7385                                 "    %v4f16 = OpTypeVector %f16 4\n"
7386                                 "      %f64 = OpTypeFloat 64\n"
7387                                 "    %v4f64 = OpTypeVector %f64 4\n"
7388                                 "  %m2v4f16 = OpTypeMatrix %v4f16 2\n"
7389                                 "  %m2v4f64 = OpTypeMatrix %v4f64 2\n"
7390                                 "%a8m2v4f16 = OpTypeArray %m2v4f16 %c_i32_8\n"
7391                                 "%a8m2v4f64 = OpTypeArray %m2v4f64 %c_i32_8\n"
7392                                 " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
7393                                 " %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7394                                 "   %SSBO64 = OpTypeStruct %a8m2v4f64\n"
7395                                 "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7396                                 "   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7397                                 "     %PC16 = OpTypeStruct %a8m2v4f16\n"
7398                                 "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
7399                                 "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
7400 
7401         fragments["decoration"] = "OpDecorate %a8m2v4f16 ArrayStride 16\n"
7402                                   "OpDecorate %a8m2v4f64 ArrayStride 64\n"
7403                                   "OpDecorate %SSBO64 BufferBlock\n"
7404                                   "OpMemberDecorate %SSBO64 0 Offset 0\n"
7405                                   "OpMemberDecorate %SSBO64 0 ColMajor\n"
7406                                   "OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
7407                                   "OpDecorate %PC16 Block\n"
7408                                   "OpMemberDecorate %PC16 0 Offset 0\n"
7409                                   "OpMemberDecorate %PC16 0 ColMajor\n"
7410                                   "OpMemberDecorate %PC16 0 MatrixStride 8\n"
7411                                   "OpDecorate %ssbo64 DescriptorSet 0\n"
7412                                   "OpDecorate %ssbo64 Binding 0\n";
7413 
7414         map<string, string> specs;
7415 
7416         specs["count"]     = "%c_i32_8";
7417         specs["pp_type16"] = "%pp_v4f16";
7418         specs["up_type64"] = "%up_v4f64";
7419         specs["f_type16"]  = "%v4f16";
7420         specs["f_type64"]  = "%v4f64";
7421         specs["index0"]    = "%c_i32_0";
7422         specs["store"]     = "  %src_1 = OpAccessChain %pp_v4f16 %pc16 %c_i32_0 %30 %c_i32_1\n"
7423                              "%val16_1 = OpLoad %v4f16 %src_1\n"
7424                              "%val64_1 = OpFConvert %v4f64 %val16_1\n"
7425                              "  %dst_1 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_1\n"
7426                              "           OpStore %dst_1 %val64_1\n";
7427 
7428         fragments["testfun"] = testFun.specialize(specs);
7429 
7430         createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, pcs, resources, extensions,
7431                                 testGroup, requiredFeatures);
7432     }
7433 }
7434 
addCompute16bitStorageUniform64To16Group(tcu::TestCaseGroup * group)7435 void addCompute16bitStorageUniform64To16Group(tcu::TestCaseGroup *group)
7436 {
7437     tcu::TestContext &testCtx = group->getTestContext();
7438     de::Random rnd(deStringHash(group->getName()));
7439     const int numElements = 128;
7440 
7441     const StringTemplate shaderTemplate("OpCapability Shader\n"
7442                                         "OpCapability ${capability}\n"
7443                                         "OpCapability Float64\n"
7444                                         "OpExtension \"SPV_KHR_16bit_storage\"\n"
7445                                         "OpMemoryModel Logical GLSL450\n"
7446                                         "OpEntryPoint GLCompute %main \"main\" %id\n"
7447                                         "OpExecutionMode %main LocalSize 1 1 1\n"
7448                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
7449 
7450                                         "${stride}\n"
7451 
7452                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
7453                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
7454                                         "OpDecorate %SSBO64 ${storage}\n"
7455                                         "OpDecorate %SSBO16 BufferBlock\n"
7456                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
7457                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
7458                                         "OpDecorate %ssbo64 Binding 0\n"
7459                                         "OpDecorate %ssbo16 Binding 1\n"
7460 
7461                                         "${matrix_decor:opt}\n"
7462 
7463                                         "${rounding:opt}\n"
7464 
7465                                         "%bool      = OpTypeBool\n"
7466                                         "%void      = OpTypeVoid\n"
7467                                         "%voidf     = OpTypeFunction %void\n"
7468                                         "%u32       = OpTypeInt 32 0\n"
7469                                         "%i32       = OpTypeInt 32 1\n"
7470                                         "%f32       = OpTypeFloat 32\n"
7471                                         "%f64       = OpTypeFloat 64\n"
7472                                         "%uvec3     = OpTypeVector %u32 3\n"
7473                                         "%fvec3     = OpTypeVector %f32 3\n"
7474                                         "%uvec3ptr  = OpTypePointer Input %uvec3\n"
7475                                         "%i32ptr    = OpTypePointer Uniform %i32\n"
7476                                         "%f64ptr    = OpTypePointer Uniform %f64\n"
7477 
7478                                         "%zero      = OpConstant %i32 0\n"
7479                                         "%c_i32_1   = OpConstant %i32 1\n"
7480                                         "%c_i32_16  = OpConstant %i32 16\n"
7481                                         "%c_i32_32  = OpConstant %i32 32\n"
7482                                         "%c_i32_64  = OpConstant %i32 64\n"
7483                                         "%c_i32_128 = OpConstant %i32 128\n"
7484 
7485                                         "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
7486                                         "%f64arr    = OpTypeArray %f64 %c_i32_128\n"
7487 
7488                                         "${types}\n"
7489                                         "${matrix_types:opt}\n"
7490 
7491                                         "%SSBO64    = OpTypeStruct %${matrix_prefix:opt}${base64}arr\n"
7492                                         "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
7493                                         "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7494                                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7495                                         "%ssbo64    = OpVariable %up_SSBO64 Uniform\n"
7496                                         "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
7497 
7498                                         "%id        = OpVariable %uvec3ptr Input\n"
7499 
7500                                         "%main      = OpFunction %void None %voidf\n"
7501                                         "%label     = OpLabel\n"
7502                                         "%idval     = OpLoad %uvec3 %id\n"
7503                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
7504                                         "%inloc     = OpAccessChain %${base64}ptr %ssbo64 %zero %x ${index0:opt}\n"
7505                                         "%val64     = OpLoad %${base64} %inloc\n"
7506                                         "%val16     = ${convert} %${base16} %val64\n"
7507                                         "%outloc    = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
7508                                         "             OpStore %outloc %val16\n"
7509                                         "${matrix_store:opt}\n"
7510                                         "             OpReturn\n"
7511                                         "             OpFunctionEnd\n");
7512 
7513     { // Floats
7514         const char floatTypes[] = "%f16       = OpTypeFloat 16\n"
7515                                   "%f16ptr    = OpTypePointer Uniform %f16\n"
7516                                   "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
7517                                   "%v4f16     = OpTypeVector %f16 4\n"
7518                                   "%v4f64     = OpTypeVector %f64 4\n"
7519                                   "%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
7520                                   "%v4f64ptr  = OpTypePointer Uniform %v4f64\n"
7521                                   "%v4f16arr  = OpTypeArray %v4f16 %c_i32_32\n"
7522                                   "%v4f64arr  = OpTypeArray %v4f64 %c_i32_32\n";
7523 
7524         struct RndMode
7525         {
7526             const char *name;
7527             const char *decor;
7528             VerifyIOFunc func;
7529         };
7530 
7531         const RndMode rndModes[] = {
7532             {"rtz", "OpDecorate %val16  FPRoundingMode RTZ", computeCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7533             {"rte", "OpDecorate %val16  FPRoundingMode RTE", computeCheck16BitFloats64<ROUNDINGMODE_RTE>},
7534             {"unspecified_rnd_mode", "",
7535              computeCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7536         };
7537 
7538         struct CompositeType
7539         {
7540             const char *name;
7541             const char *base64;
7542             const char *base16;
7543             const char *strideStr;
7544             const char *stride64UBO;
7545             unsigned padding64UBO;
7546             const char *stride64SSBO;
7547             unsigned padding64SSBO;
7548             unsigned count;
7549         };
7550 
7551         const CompositeType cTypes[] = {
7552             {"scalar", "f64", "f16", "OpDecorate %f16arr ArrayStride 2\nOpDecorate %f64arr ArrayStride ", "16", 8, "8",
7553              0, numElements},
7554             {"vector", "v4f64", "v4f16", "OpDecorate %v4f16arr ArrayStride 8\nOpDecorate %v4f64arr ArrayStride ", "32",
7555              0, "32", 0, numElements / 4},
7556             {"matrix", "v4f64", "v4f16", "OpDecorate %m2v4f16arr ArrayStride 16\nOpDecorate %m2v4f64arr ArrayStride ",
7557              "64", 0, "64", 0, numElements / 8},
7558         };
7559 
7560         vector<double> float64Data = getFloat64s(rnd, numElements);
7561         vector<deFloat16> float16UnusedData(numElements, 0);
7562 
7563         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7564             for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
7565                 for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7566                 {
7567                     ComputeShaderSpec spec;
7568                     map<string, string> specs;
7569                     string testName = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float_" +
7570                                       rndModes[rndModeIdx].name;
7571                     const bool isUBO = CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
7572 
7573                     specs["capability"] = CAPABILITIES[capIdx].cap;
7574                     specs["storage"]    = CAPABILITIES[capIdx].decor;
7575                     specs["stride"]     = cTypes[tyIdx].strideStr;
7576                     specs["base64"]     = cTypes[tyIdx].base64;
7577                     specs["base16"]     = cTypes[tyIdx].base16;
7578                     specs["rounding"]   = rndModes[rndModeIdx].decor;
7579                     specs["types"]      = floatTypes;
7580                     specs["convert"]    = "OpFConvert";
7581 
7582                     if (isUBO)
7583                         specs["stride"] += cTypes[tyIdx].stride64UBO;
7584                     else
7585                         specs["stride"] += cTypes[tyIdx].stride64SSBO;
7586 
7587                     if (deStringEqual(cTypes[tyIdx].name, "matrix"))
7588                     {
7589                         if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
7590                             specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTZ\n";
7591                         else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
7592                             specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTE\n";
7593 
7594                         specs["index0"]        = "%zero";
7595                         specs["matrix_prefix"] = "m2";
7596                         specs["matrix_types"]  = "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
7597                                                  "%m2v4f64 = OpTypeMatrix %v4f64 2\n"
7598                                                  "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
7599                                                  "%m2v4f64arr = OpTypeArray %m2v4f64 %c_i32_16\n";
7600                         specs["matrix_decor"]  = "OpMemberDecorate %SSBO64 0 ColMajor\n"
7601                                                  "OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
7602                                                  "OpMemberDecorate %SSBO16 0 ColMajor\n"
7603                                                  "OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
7604                         specs["matrix_store"]  = "%inloc_1  = OpAccessChain %v4f64ptr %ssbo64 %zero %x %c_i32_1\n"
7605                                                  "%val64_1  = OpLoad %v4f64 %inloc_1\n"
7606                                                  "%val16_1  = OpFConvert %v4f16 %val64_1\n"
7607                                                  "%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
7608                                                  "            OpStore %outloc_1 %val16_1\n";
7609                     }
7610 
7611                     spec.assembly          = shaderTemplate.specialize(specs);
7612                     spec.numWorkGroups     = IVec3(cTypes[tyIdx].count, 1, 1);
7613                     spec.verifyIO          = rndModes[rndModeIdx].func;
7614                     const unsigned padding = isUBO ? cTypes[tyIdx].padding64UBO : cTypes[tyIdx].padding64SSBO;
7615 
7616                     spec.inputs.push_back(
7617                         Resource(BufferSp(new Float64Buffer(float64Data, padding)), CAPABILITIES[capIdx].dtype));
7618 
7619                     // We provided a custom verifyIO in the above in which inputs will be used for checking.
7620                     // So put unused data in the expected values.
7621                     spec.outputs.push_back(BufferSp(new Float16Buffer(float16UnusedData)));
7622 
7623                     spec.extensions.push_back("VK_KHR_16bit_storage");
7624 
7625                     spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7626                     spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
7627 
7628                     group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
7629                 }
7630     }
7631 }
7632 
addGraphics16BitStorageUniformFloat64To16Group(tcu::TestCaseGroup * testGroup)7633 void addGraphics16BitStorageUniformFloat64To16Group(tcu::TestCaseGroup *testGroup)
7634 {
7635     de::Random rnd(deStringHash(testGroup->getName()));
7636     map<string, string> fragments;
7637     GraphicsResources resources;
7638     vector<string> extensions;
7639     const uint32_t numDataPoints = 256;
7640     RGBA defaultColors[4];
7641     vector<double> float64Data = getFloat64s(rnd, numDataPoints);
7642     vector<deFloat16> float16UnusedData(numDataPoints, 0);
7643     const StringTemplate capabilities("OpCapability Float64\n"
7644                                       "OpCapability ${cap}\n");
7645     // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
7646     resources.outputs.push_back(
7647         Resource(BufferSp(new Float16Buffer(float16UnusedData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
7648 
7649     extensions.push_back("VK_KHR_16bit_storage");
7650 
7651     fragments["extension"] = "OpExtension \"SPV_KHR_16bit_storage\"";
7652 
7653     struct RndMode
7654     {
7655         const char *name;
7656         const char *decor;
7657         VerifyIOFunc f;
7658     };
7659 
7660     getDefaultColors(defaultColors);
7661 
7662     { // scalar cases
7663         fragments["pre_main"] = "      %f16 = OpTypeFloat 16\n"
7664                                 "      %f64 = OpTypeFloat 64\n"
7665                                 "%c_i32_256 = OpConstant %i32 256\n"
7666                                 "   %up_f64 = OpTypePointer Uniform %f64\n"
7667                                 "   %up_f16 = OpTypePointer Uniform %f16\n"
7668                                 "   %ra_f64 = OpTypeArray %f64 %c_i32_256\n"
7669                                 "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
7670                                 "   %SSBO64 = OpTypeStruct %ra_f64\n"
7671                                 "   %SSBO16 = OpTypeStruct %ra_f16\n"
7672                                 "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7673                                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7674                                 "   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7675                                 "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7676 
7677         const StringTemplate decoration("OpDecorate %ra_f64 ArrayStride ${stride64}\n"
7678                                         "OpDecorate %ra_f16 ArrayStride 2\n"
7679                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
7680                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
7681                                         "OpDecorate %SSBO64 ${indecor}\n"
7682                                         "OpDecorate %SSBO16 BufferBlock\n"
7683                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
7684                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
7685                                         "OpDecorate %ssbo64 Binding 0\n"
7686                                         "OpDecorate %ssbo16 Binding 1\n"
7687                                         "${rounddecor}\n");
7688 
7689         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7690                                "    %param = OpFunctionParameter %v4f32\n"
7691 
7692                                "%entry = OpLabel\n"
7693                                "    %i = OpVariable %fp_i32 Function\n"
7694                                "         OpStore %i %c_i32_0\n"
7695                                "         OpBranch %loop\n"
7696 
7697                                " %loop = OpLabel\n"
7698                                "   %15 = OpLoad %i32 %i\n"
7699                                "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
7700                                "         OpLoopMerge %merge %inc None\n"
7701                                "         OpBranchConditional %lt %write %merge\n"
7702 
7703                                "%write = OpLabel\n"
7704                                "   %30 = OpLoad %i32 %i\n"
7705                                "  %src = OpAccessChain %up_f64 %ssbo64 %c_i32_0 %30\n"
7706                                "%val64 = OpLoad %f64 %src\n"
7707                                "%val16 = OpFConvert %f16 %val64\n"
7708                                "  %dst = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
7709                                "         OpStore %dst %val16\n"
7710                                "         OpBranch %inc\n"
7711 
7712                                "  %inc = OpLabel\n"
7713                                "   %37 = OpLoad %i32 %i\n"
7714                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7715                                "         OpStore %i %39\n"
7716                                "         OpBranch %loop\n"
7717 
7718                                "%merge = OpLabel\n"
7719                                "         OpReturnValue %param\n"
7720 
7721                                "OpFunctionEnd\n";
7722 
7723         const RndMode rndModes[] = {
7724             {"rtz", "OpDecorate %val16  FPRoundingMode RTZ", graphicsCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7725             {"rte", "OpDecorate %val16  FPRoundingMode RTE", graphicsCheck16BitFloats64<ROUNDINGMODE_RTE>},
7726             {"unspecified_rnd_mode", "",
7727              graphicsCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7728         };
7729 
7730         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7731             for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7732             {
7733                 map<string, string> specs;
7734                 string testName  = string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
7735                 const bool isUBO = CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
7736                 VulkanFeatures features;
7737 
7738                 specs["cap"]        = CAPABILITIES[capIdx].cap;
7739                 specs["indecor"]    = CAPABILITIES[capIdx].decor;
7740                 specs["rounddecor"] = rndModes[rndModeIdx].decor;
7741                 specs["stride64"]   = isUBO ? "16" : "8";
7742 
7743                 fragments["capability"] = capabilities.specialize(specs);
7744                 fragments["decoration"] = decoration.specialize(specs);
7745 
7746                 resources.inputs.clear();
7747                 resources.inputs.push_back(
7748                     Resource(BufferSp(new Float64Buffer(float64Data, isUBO ? 8 : 0)), CAPABILITIES[capIdx].dtype));
7749 
7750                 resources.verifyIO = rndModes[rndModeIdx].f;
7751 
7752                 features                            = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7753                 features.coreFeatures.shaderFloat64 = true;
7754                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
7755 
7756                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
7757                                         testGroup, features);
7758             }
7759     }
7760 
7761     { // vector cases
7762         fragments["pre_main"] = "      %f16 = OpTypeFloat 16\n"
7763                                 "      %f64 = OpTypeFloat 64\n"
7764                                 " %c_i32_64 = OpConstant %i32 64\n"
7765                                 "     %v4f16 = OpTypeVector %f16 4\n"
7766                                 "     %v4f64 = OpTypeVector %f64 4\n"
7767                                 " %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7768                                 " %up_v4f16 = OpTypePointer Uniform %v4f16\n"
7769                                 " %ra_v4f64 = OpTypeArray %v4f64 %c_i32_64\n"
7770                                 " %ra_v4f16 = OpTypeArray %v4f16 %c_i32_64\n"
7771                                 "   %SSBO64 = OpTypeStruct %ra_v4f64\n"
7772                                 "   %SSBO16 = OpTypeStruct %ra_v4f16\n"
7773                                 "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7774                                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7775                                 "   %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7776                                 "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7777 
7778         const StringTemplate decoration("OpDecorate %ra_v4f64 ArrayStride 32\n"
7779                                         "OpDecorate %ra_v4f16 ArrayStride 8\n"
7780                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
7781                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
7782                                         "OpDecorate %SSBO64 ${indecor}\n"
7783                                         "OpDecorate %SSBO16 BufferBlock\n"
7784                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
7785                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
7786                                         "OpDecorate %ssbo64 Binding 0\n"
7787                                         "OpDecorate %ssbo16 Binding 1\n"
7788                                         "${rounddecor}\n");
7789 
7790         // ssbo16[] <- convert ssbo64[] to 16bit float
7791         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7792                                "    %param = OpFunctionParameter %v4f32\n"
7793 
7794                                "%entry = OpLabel\n"
7795                                "    %i = OpVariable %fp_i32 Function\n"
7796                                "         OpStore %i %c_i32_0\n"
7797                                "         OpBranch %loop\n"
7798 
7799                                " %loop = OpLabel\n"
7800                                "   %15 = OpLoad %i32 %i\n"
7801                                "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
7802                                "         OpLoopMerge %merge %inc None\n"
7803                                "         OpBranchConditional %lt %write %merge\n"
7804 
7805                                "%write = OpLabel\n"
7806                                "   %30 = OpLoad %i32 %i\n"
7807                                "  %src = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30\n"
7808                                "%val64 = OpLoad %v4f64 %src\n"
7809                                "%val16 = OpFConvert %v4f16 %val64\n"
7810                                "  %dst = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30\n"
7811                                "         OpStore %dst %val16\n"
7812                                "         OpBranch %inc\n"
7813 
7814                                "  %inc = OpLabel\n"
7815                                "   %37 = OpLoad %i32 %i\n"
7816                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7817                                "         OpStore %i %39\n"
7818                                "         OpBranch %loop\n"
7819 
7820                                "%merge = OpLabel\n"
7821                                "         OpReturnValue %param\n"
7822 
7823                                "OpFunctionEnd\n";
7824 
7825         const RndMode rndModes[] = {
7826             {"rtz", "OpDecorate %val16  FPRoundingMode RTZ", graphicsCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7827             {"rte", "OpDecorate %val16  FPRoundingMode RTE", graphicsCheck16BitFloats64<ROUNDINGMODE_RTE>},
7828             {"unspecified_rnd_mode", "",
7829              graphicsCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7830         };
7831 
7832         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7833             for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7834             {
7835                 map<string, string> specs;
7836                 VulkanFeatures features;
7837                 string testName = string(CAPABILITIES[capIdx].name) + "_vector_float_" + rndModes[rndModeIdx].name;
7838 
7839                 specs["cap"]        = CAPABILITIES[capIdx].cap;
7840                 specs["indecor"]    = CAPABILITIES[capIdx].decor;
7841                 specs["rounddecor"] = rndModes[rndModeIdx].decor;
7842 
7843                 fragments["capability"] = capabilities.specialize(specs);
7844                 fragments["decoration"] = decoration.specialize(specs);
7845 
7846                 resources.inputs.clear();
7847                 resources.inputs.push_back(
7848                     Resource(BufferSp(new Float64Buffer(float64Data)), CAPABILITIES[capIdx].dtype));
7849                 resources.verifyIO = rndModes[rndModeIdx].f;
7850 
7851                 features                            = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7852                 features.coreFeatures.shaderFloat64 = true;
7853                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
7854 
7855                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
7856                                         testGroup, features);
7857             }
7858     }
7859 
7860     { // matrix cases
7861         fragments["pre_main"] = "       %f16 = OpTypeFloat 16\n"
7862                                 "       %f64 = OpTypeFloat 64\n"
7863                                 "  %c_i32_16 = OpConstant %i32 16\n"
7864                                 "     %v4f16 = OpTypeVector %f16 4\n"
7865                                 "     %v4f64 = OpTypeVector %f64 4\n"
7866                                 "   %m4x4f64 = OpTypeMatrix %v4f64 4\n"
7867                                 "   %m4x4f16 = OpTypeMatrix %v4f16 4\n"
7868                                 "  %up_v4f64 = OpTypePointer Uniform %v4f64\n"
7869                                 "  %up_v4f16 = OpTypePointer Uniform %v4f16\n"
7870                                 "%a16m4x4f64 = OpTypeArray %m4x4f64 %c_i32_16\n"
7871                                 "%a16m4x4f16 = OpTypeArray %m4x4f16 %c_i32_16\n"
7872                                 "    %SSBO64 = OpTypeStruct %a16m4x4f64\n"
7873                                 "    %SSBO16 = OpTypeStruct %a16m4x4f16\n"
7874                                 " %up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
7875                                 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
7876                                 "    %ssbo64 = OpVariable %up_SSBO64 Uniform\n"
7877                                 "    %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
7878 
7879         const StringTemplate decoration("OpDecorate %a16m4x4f64 ArrayStride 128\n"
7880                                         "OpDecorate %a16m4x4f16 ArrayStride 32\n"
7881                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
7882                                         "OpMemberDecorate %SSBO64 0 ColMajor\n"
7883                                         "OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
7884                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
7885                                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
7886                                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n"
7887                                         "OpDecorate %SSBO64 ${indecor}\n"
7888                                         "OpDecorate %SSBO16 BufferBlock\n"
7889                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
7890                                         "OpDecorate %ssbo16 DescriptorSet 0\n"
7891                                         "OpDecorate %ssbo64 Binding 0\n"
7892                                         "OpDecorate %ssbo16 Binding 1\n"
7893                                         "${rounddecor}\n");
7894 
7895         fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7896                                "    %param = OpFunctionParameter %v4f32\n"
7897 
7898                                "%entry = OpLabel\n"
7899                                "    %i = OpVariable %fp_i32 Function\n"
7900                                "         OpStore %i %c_i32_0\n"
7901                                "         OpBranch %loop\n"
7902 
7903                                " %loop = OpLabel\n"
7904                                "   %15 = OpLoad %i32 %i\n"
7905                                "   %lt = OpSLessThan %bool %15 %c_i32_16\n"
7906                                "         OpLoopMerge %merge %inc None\n"
7907                                "         OpBranchConditional %lt %write %merge\n"
7908 
7909                                "  %write = OpLabel\n"
7910                                "     %30 = OpLoad %i32 %i\n"
7911                                "  %src_0 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_0\n"
7912                                "  %src_1 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_1\n"
7913                                "  %src_2 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_2\n"
7914                                "  %src_3 = OpAccessChain %up_v4f64 %ssbo64 %c_i32_0 %30 %c_i32_3\n"
7915                                "%val64_0 = OpLoad %v4f64 %src_0\n"
7916                                "%val64_1 = OpLoad %v4f64 %src_1\n"
7917                                "%val64_2 = OpLoad %v4f64 %src_2\n"
7918                                "%val64_3 = OpLoad %v4f64 %src_3\n"
7919                                "%val16_0 = OpFConvert %v4f16 %val64_0\n"
7920                                "%val16_1 = OpFConvert %v4f16 %val64_1\n"
7921                                "%val16_2 = OpFConvert %v4f16 %val64_2\n"
7922                                "%val16_3 = OpFConvert %v4f16 %val64_3\n"
7923                                "  %dst_0 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
7924                                "  %dst_1 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
7925                                "  %dst_2 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
7926                                "  %dst_3 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
7927                                "           OpStore %dst_0 %val16_0\n"
7928                                "           OpStore %dst_1 %val16_1\n"
7929                                "           OpStore %dst_2 %val16_2\n"
7930                                "           OpStore %dst_3 %val16_3\n"
7931                                "           OpBranch %inc\n"
7932 
7933                                "  %inc = OpLabel\n"
7934                                "   %37 = OpLoad %i32 %i\n"
7935                                "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
7936                                "         OpStore %i %39\n"
7937                                "         OpBranch %loop\n"
7938 
7939                                "%merge = OpLabel\n"
7940                                "         OpReturnValue %param\n"
7941 
7942                                "OpFunctionEnd\n";
7943 
7944         const RndMode rndModes[] = {
7945             {"rte",
7946              "OpDecorate %val16_0  FPRoundingMode RTE\nOpDecorate %val16_1  FPRoundingMode RTE\nOpDecorate %val16_2  "
7947              "FPRoundingMode RTE\nOpDecorate %val16_3  FPRoundingMode RTE",
7948              graphicsCheck16BitFloats64<ROUNDINGMODE_RTE>},
7949             {"rtz",
7950              "OpDecorate %val16_0  FPRoundingMode RTZ\nOpDecorate %val16_1  FPRoundingMode RTZ\nOpDecorate %val16_2  "
7951              "FPRoundingMode RTZ\nOpDecorate %val16_3  FPRoundingMode RTZ",
7952              graphicsCheck16BitFloats64<ROUNDINGMODE_RTZ>},
7953             {"unspecified_rnd_mode", "",
7954              graphicsCheck16BitFloats64<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
7955         };
7956 
7957         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
7958             for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
7959             {
7960                 map<string, string> specs;
7961                 VulkanFeatures features;
7962                 string testName = string(CAPABILITIES[capIdx].name) + "_matrix_float_" + rndModes[rndModeIdx].name;
7963 
7964                 specs["cap"]        = CAPABILITIES[capIdx].cap;
7965                 specs["indecor"]    = CAPABILITIES[capIdx].decor;
7966                 specs["rounddecor"] = rndModes[rndModeIdx].decor;
7967 
7968                 fragments["capability"] = capabilities.specialize(specs);
7969                 fragments["decoration"] = decoration.specialize(specs);
7970 
7971                 resources.inputs.clear();
7972                 resources.inputs.push_back(
7973                     Resource(BufferSp(new Float64Buffer(float64Data)), CAPABILITIES[capIdx].dtype));
7974                 resources.verifyIO = rndModes[rndModeIdx].f;
7975 
7976                 features                            = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
7977                 features.coreFeatures.shaderFloat64 = true;
7978                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
7979 
7980                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions,
7981                                         testGroup, features);
7982             }
7983     }
7984 }
7985 
addGraphics16BitStorageInputOutputFloat64To16Group(tcu::TestCaseGroup * testGroup)7986 void addGraphics16BitStorageInputOutputFloat64To16Group(tcu::TestCaseGroup *testGroup)
7987 {
7988     de::Random rnd(deStringHash(testGroup->getName()));
7989     RGBA defaultColors[4];
7990     vector<string> extensions;
7991     map<string, string> fragments = passthruFragments();
7992     const uint32_t numDataPoints  = 64;
7993     // Special values like inf/nan/denormal may not be preserved when float control features are not provided,
7994     // thus values generating special float16 values must be excluded in input data here.
7995     vector<double> float64Data = getFloat64s(rnd, numDataPoints, false);
7996 
7997     extensions.push_back("VK_KHR_16bit_storage");
7998 
7999     fragments["capability"] = "OpCapability StorageInputOutput16\n"
8000                               "OpCapability Float64\n";
8001     fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"\n";
8002 
8003     getDefaultColors(defaultColors);
8004 
8005     struct RndMode
8006     {
8007         const char *name;
8008         const char *decor;
8009         const char *decor_tessc;
8010         RoundingModeFlags flags;
8011     };
8012 
8013     const RndMode rndModes[] = {
8014         {"rtz", "OpDecorate %ret0  FPRoundingMode RTZ\n",
8015          "OpDecorate %ret1  FPRoundingMode RTZ\n"
8016          "OpDecorate %ret2  FPRoundingMode RTZ\n",
8017          ROUNDINGMODE_RTZ},
8018         {"rte", "OpDecorate %ret0  FPRoundingMode RTE\n",
8019          "OpDecorate %ret1  FPRoundingMode RTE\n"
8020          "OpDecorate %ret2  FPRoundingMode RTE\n",
8021          ROUNDINGMODE_RTE},
8022         {"unspecified_rnd_mode", "", "", RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)},
8023     };
8024 
8025     struct Case
8026     {
8027         const char *name;
8028         const char *interfaceOpCall;
8029         const char *interfaceOpFunc;
8030         const char *postInterfaceOp;
8031         const char *postInterfaceOpGeom;
8032         const char *postInterfaceOpTessc;
8033         const char *preMain;
8034         const char *inputType;
8035         const char *outputType;
8036         uint32_t numPerCase;
8037         uint32_t numElements;
8038     };
8039 
8040     const Case cases[] = {{
8041                               // Scalar cases
8042                               "scalar",
8043 
8044                               "OpFConvert %f16",
8045 
8046                               "",
8047 
8048                               "             %ret0 = OpFConvert %f16 %IF_input_val\n"
8049                               "                OpStore %IF_output %ret0\n",
8050 
8051                               "             %ret0 = OpFConvert %f16 %IF_input_val0\n"
8052                               "                OpStore %IF_output %ret0\n",
8053 
8054                               "             %ret0 = OpFConvert %f16 %IF_input_val0\n"
8055                               "                OpStore %IF_output_ptr0 %ret0\n"
8056                               "             %ret1 = OpFConvert %f16 %IF_input_val1\n"
8057                               "                OpStore %IF_output_ptr1 %ret1\n"
8058                               "             %ret2 = OpFConvert %f16 %IF_input_val2\n"
8059                               "                OpStore %IF_output_ptr2 %ret2\n",
8060 
8061                               "             %f16 = OpTypeFloat 16\n"
8062                               "             %f64 = OpTypeFloat 64\n"
8063                               "          %op_f16 = OpTypePointer Output %f16\n"
8064                               "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
8065                               "        %op_a3f16 = OpTypePointer Output %a3f16\n"
8066                               "%f16_f64_function = OpTypeFunction %f16 %f64\n"
8067                               "           %a3f64 = OpTypeArray %f64 %c_i32_3\n"
8068                               "        %ip_a3f64 = OpTypePointer Input %a3f64\n"
8069                               "          %ip_f64 = OpTypePointer Input %f64\n",
8070 
8071                               "f64",
8072                               "f16",
8073                               4,
8074                               1,
8075                           },
8076                           {
8077                               // Vector cases
8078                               "vector",
8079 
8080                               "OpFConvert %v2f16",
8081 
8082                               "",
8083 
8084                               "             %ret0 = OpFConvert %v2f16 %IF_input_val\n"
8085                               "                OpStore %IF_output %ret0\n",
8086 
8087                               "             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
8088                               "                OpStore %IF_output %ret0\n",
8089 
8090                               "             %ret0 = OpFConvert %v2f16 %IF_input_val0\n"
8091                               "                OpStore %IF_output_ptr0 %ret0\n"
8092                               "             %ret1 = OpFConvert %v2f16 %IF_input_val1\n"
8093                               "                OpStore %IF_output_ptr1 %ret1\n"
8094                               "             %ret2 = OpFConvert %v2f16 %IF_input_val2\n"
8095                               "                OpStore %IF_output_ptr2 %ret2\n",
8096 
8097                               "                 %f16 = OpTypeFloat 16\n"
8098                               "                 %f64 = OpTypeFloat 64\n"
8099                               "               %v2f16 = OpTypeVector %f16 2\n"
8100                               "               %v2f64 = OpTypeVector %f64 2\n"
8101                               "            %op_v2f16 = OpTypePointer Output %v2f16\n"
8102                               "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
8103                               "          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n"
8104                               "%v2f16_v2f64_function = OpTypeFunction %v2f16 %v2f64\n"
8105                               "             %a3v2f64 = OpTypeArray %v2f64 %c_i32_3\n"
8106                               "          %ip_a3v2f64 = OpTypePointer Input %a3v2f64\n"
8107                               "          %ip_v2f64 = OpTypePointer Input %v2f64\n",
8108 
8109                               "v2f64",
8110                               "v2f16",
8111                               2 * 4,
8112                               2,
8113                           }};
8114 
8115     VulkanFeatures requiredFeatures;
8116 
8117     requiredFeatures.coreFeatures.shaderFloat64           = true;
8118     requiredFeatures.ext16BitStorage.storageInputOutput16 = true;
8119 
8120     for (uint32_t caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
8121         for (uint32_t rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
8122         {
8123             fragments["interface_op_func"]       = cases[caseIdx].interfaceOpFunc;
8124             fragments["interface_op_call"]       = cases[caseIdx].interfaceOpCall;
8125             fragments["post_interface_op_frag"]  = cases[caseIdx].postInterfaceOp;
8126             fragments["post_interface_op_vert"]  = cases[caseIdx].postInterfaceOp;
8127             fragments["post_interface_op_geom"]  = cases[caseIdx].postInterfaceOpGeom;
8128             fragments["post_interface_op_tesse"] = cases[caseIdx].postInterfaceOpGeom;
8129             fragments["post_interface_op_tessc"] = cases[caseIdx].postInterfaceOpTessc;
8130             fragments["pre_main"]                = cases[caseIdx].preMain;
8131             fragments["decoration"]              = rndModes[rndModeIdx].decor;
8132             fragments["decoration_tessc"]        = rndModes[rndModeIdx].decor_tessc;
8133 
8134             fragments["input_type"]  = cases[caseIdx].inputType;
8135             fragments["output_type"] = cases[caseIdx].outputType;
8136 
8137             GraphicsInterfaces interfaces;
8138             const uint32_t numPerCase = cases[caseIdx].numPerCase;
8139             vector<double> subInputs(numPerCase);
8140             vector<deFloat16> subOutputs(numPerCase);
8141 
8142             // The pipeline need this to call compare16BitFloat() when checking the result.
8143             interfaces.setRoundingMode(rndModes[rndModeIdx].flags);
8144 
8145             for (uint32_t caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
8146             {
8147                 string testName =
8148                     string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
8149 
8150                 for (uint32_t numNdx = 0; numNdx < numPerCase; ++numNdx)
8151                 {
8152                     subInputs[numNdx] = float64Data[caseNdx * numPerCase + numNdx];
8153                     // We derive the expected result from inputs directly in the graphics pipeline.
8154                     subOutputs[numNdx] = 0;
8155                 }
8156                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT64),
8157                                                          BufferSp(new Float64Buffer(subInputs))),
8158                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16),
8159                                                          BufferSp(new Float16Buffer(subOutputs))));
8160                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions,
8161                                         testGroup, requiredFeatures);
8162             }
8163         }
8164 }
8165 
addCompute16bitStorageUniform16To64Group(tcu::TestCaseGroup * group)8166 void addCompute16bitStorageUniform16To64Group(tcu::TestCaseGroup *group)
8167 {
8168     tcu::TestContext &testCtx = group->getTestContext();
8169     de::Random rnd(deStringHash(group->getName()));
8170     const int numElements = 128;
8171 
8172     const StringTemplate shaderTemplate(
8173         "OpCapability Shader\n"
8174         "OpCapability Float64\n"
8175         "OpCapability ${capability}\n"
8176         "OpExtension \"SPV_KHR_16bit_storage\"\n"
8177         "OpMemoryModel Logical GLSL450\n"
8178         "OpEntryPoint GLCompute %main \"main\" %id\n"
8179         "OpExecutionMode %main LocalSize 1 1 1\n"
8180         "OpDecorate %id BuiltIn GlobalInvocationId\n"
8181 
8182         "${stride}\n"
8183 
8184         "OpMemberDecorate %SSBO64 0 Offset 0\n"
8185         "OpMemberDecorate %SSBO16 0 Offset 0\n"
8186         "OpDecorate %SSBO64 BufferBlock\n"
8187         "OpDecorate %SSBO16 ${storage}\n"
8188         "OpDecorate %ssbo64 DescriptorSet 0\n"
8189         "OpDecorate %ssbo16 DescriptorSet 0\n"
8190         "OpDecorate %ssbo64 Binding 1\n"
8191         "OpDecorate %ssbo16 Binding 0\n"
8192 
8193         "${matrix_decor:opt}\n"
8194 
8195         "%bool      = OpTypeBool\n"
8196         "%void      = OpTypeVoid\n"
8197         "%voidf     = OpTypeFunction %void\n"
8198         "%u32       = OpTypeInt 32 0\n"
8199         "%i32       = OpTypeInt 32 1\n"
8200         "%f64       = OpTypeFloat 64\n"
8201         "%v3u32     = OpTypeVector %u32 3\n"
8202         "%uvec3ptr  = OpTypePointer Input %v3u32\n"
8203         "%i32ptr    = OpTypePointer Uniform %i32\n"
8204         "%f64ptr    = OpTypePointer Uniform %f64\n"
8205 
8206         "%zero      = OpConstant %i32 0\n"
8207         "%c_i32_1   = OpConstant %i32 1\n"
8208         "%c_i32_2   = OpConstant %i32 2\n"
8209         "%c_i32_3   = OpConstant %i32 3\n"
8210         "%c_i32_16  = OpConstant %i32 16\n"
8211         "%c_i32_32  = OpConstant %i32 32\n"
8212         "%c_i32_64  = OpConstant %i32 64\n"
8213         "%c_i32_128 = OpConstant %i32 128\n"
8214         "%c_i32_ci  = OpConstant %i32 ${constarrayidx}\n"
8215 
8216         "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
8217         "%f64arr    = OpTypeArray %f64 %c_i32_128\n"
8218 
8219         "${types}\n"
8220         "${matrix_types:opt}\n"
8221 
8222         "%SSBO64    = OpTypeStruct %${matrix_prefix:opt}${base64}arr\n"
8223         "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
8224         "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
8225         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
8226         "%ssbo64    = OpVariable %up_SSBO64 Uniform\n"
8227         "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
8228 
8229         "%id        = OpVariable %uvec3ptr Input\n"
8230 
8231         "%main      = OpFunction %void None %voidf\n"
8232         "%label     = OpLabel\n"
8233         "%idval     = OpLoad %v3u32 %id\n"
8234         "%x         = OpCompositeExtract %u32 %idval 0\n"
8235         "%inloc     = OpAccessChain %${base16}ptr %ssbo16 %zero %${arrayindex} ${index0:opt}\n"
8236         "%val16     = OpLoad %${base16} %inloc\n"
8237         "%val64     = ${convert} %${base64} %val16\n"
8238         "%outloc    = OpAccessChain %${base64}ptr %ssbo64 %zero %x ${index0:opt}\n"
8239         "             OpStore %outloc %val64\n"
8240         "${matrix_store:opt}\n"
8241         "             OpReturn\n"
8242         "             OpFunctionEnd\n");
8243 
8244     { // floats
8245         const char floatTypes[] = "%f16       = OpTypeFloat 16\n"
8246                                   "%f16ptr    = OpTypePointer Uniform %f16\n"
8247                                   "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
8248                                   "%v2f16     = OpTypeVector %f16 2\n"
8249                                   "%v2f64     = OpTypeVector %f64 2\n"
8250                                   "%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
8251                                   "%v2f64ptr  = OpTypePointer Uniform %v2f64\n"
8252                                   "%v2f16arr  = OpTypeArray %v2f16 %c_i32_64\n"
8253                                   "%v2f64arr  = OpTypeArray %v2f64 %c_i32_64\n";
8254 
8255         enum DataType
8256         {
8257             SCALAR,
8258             VEC2,
8259             MAT2X2,
8260         };
8261 
8262         struct CompositeType
8263         {
8264             const char *name;
8265             const char *base64;
8266             const char *base16;
8267             const char *strideStr;
8268             const char *stride16UBO;
8269             unsigned padding16UBO;
8270             const char *stride16SSBO;
8271             unsigned padding16SSBO;
8272             bool useConstantIndex;
8273             unsigned constantIndex;
8274             unsigned count;
8275             DataType dataType;
8276         };
8277 
8278         const CompositeType cTypes[] = {
8279             {"scalar", "f64", "f16", "OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride ", "16", 14, "2",
8280              0, false, 0, numElements, SCALAR},
8281             {"scalar_const_idx_5", "f64", "f16", "OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride ",
8282              "16", 14, "2", 0, true, 5, numElements, SCALAR},
8283             {"scalar_const_idx_8", "f64", "f16", "OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride ",
8284              "16", 14, "2", 0, true, 8, numElements, SCALAR},
8285             {"vector", "v2f64", "v2f16", "OpDecorate %v2f64arr ArrayStride 16\nOpDecorate %v2f16arr ArrayStride ", "16",
8286              12, "4", 0, false, 0, numElements / 2, VEC2},
8287             {"matrix", "v2f64", "v2f16", "OpDecorate %m4v2f64arr ArrayStride 64\nOpDecorate %m4v2f16arr ArrayStride ",
8288              "16", 0, "16", 0, false, 0, numElements / 8, MAT2X2}};
8289 
8290         vector<deFloat16> float16Data = getFloat16s(rnd, numElements);
8291         vector<double> float64Data;
8292 
8293         float64Data.reserve(numElements);
8294         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
8295             float64Data.push_back(deFloat16To64(float16Data[numIdx]));
8296 
8297         for (uint32_t capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
8298             for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
8299             {
8300                 ComputeShaderSpec spec;
8301                 map<string, string> specs;
8302                 string testName  = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float";
8303                 const bool isUBO = CAPABILITIES[capIdx].dtype == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
8304 
8305                 specs["capability"]    = CAPABILITIES[capIdx].cap;
8306                 specs["storage"]       = CAPABILITIES[capIdx].decor;
8307                 specs["stride"]        = cTypes[tyIdx].strideStr;
8308                 specs["base64"]        = cTypes[tyIdx].base64;
8309                 specs["base16"]        = cTypes[tyIdx].base16;
8310                 specs["types"]         = floatTypes;
8311                 specs["convert"]       = "OpFConvert";
8312                 specs["constarrayidx"] = de::toString(cTypes[tyIdx].constantIndex);
8313 
8314                 if (isUBO)
8315                     specs["stride"] += cTypes[tyIdx].stride16UBO;
8316                 else
8317                     specs["stride"] += cTypes[tyIdx].stride16SSBO;
8318 
8319                 if (cTypes[tyIdx].useConstantIndex)
8320                     specs["arrayindex"] = "c_i32_ci";
8321                 else
8322                     specs["arrayindex"] = "x";
8323 
8324                 vector<double> float64DataConstIdx;
8325                 if (cTypes[tyIdx].useConstantIndex)
8326                 {
8327                     const uint32_t numFloats = numElements / cTypes[tyIdx].count;
8328                     for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
8329                         float64DataConstIdx.push_back(
8330                             float64Data[cTypes[tyIdx].constantIndex * numFloats + numIdx % numFloats]);
8331                 }
8332 
8333                 if (deStringEqual(cTypes[tyIdx].name, "matrix"))
8334                 {
8335                     specs["index0"]        = "%zero";
8336                     specs["matrix_prefix"] = "m4";
8337                     specs["matrix_types"]  = "%m4v2f16 = OpTypeMatrix %v2f16 4\n"
8338                                              "%m4v2f64 = OpTypeMatrix %v2f64 4\n"
8339                                              "%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
8340                                              "%m4v2f64arr = OpTypeArray %m4v2f64 %c_i32_16\n";
8341                     specs["matrix_decor"]  = "OpMemberDecorate %SSBO64 0 ColMajor\n"
8342                                              "OpMemberDecorate %SSBO64 0 MatrixStride 16\n"
8343                                              "OpMemberDecorate %SSBO16 0 ColMajor\n"
8344                                              "OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
8345                     specs["matrix_store"]  = "%inloc_1  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
8346                                              "%val16_1  = OpLoad %v2f16 %inloc_1\n"
8347                                              "%val64_1  = OpFConvert %v2f64 %val16_1\n"
8348                                              "%outloc_1 = OpAccessChain %v2f64ptr %ssbo64 %zero %x %c_i32_1\n"
8349                                              "            OpStore %outloc_1 %val64_1\n"
8350 
8351                                             "%inloc_2  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
8352                                             "%val16_2  = OpLoad %v2f16 %inloc_2\n"
8353                                             "%val64_2  = OpFConvert %v2f64 %val16_2\n"
8354                                             "%outloc_2 = OpAccessChain %v2f64ptr %ssbo64 %zero %x %c_i32_2\n"
8355                                             "            OpStore %outloc_2 %val64_2\n"
8356 
8357                                             "%inloc_3  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
8358                                             "%val16_3  = OpLoad %v2f16 %inloc_3\n"
8359                                             "%val64_3  = OpFConvert %v2f64 %val16_3\n"
8360                                             "%outloc_3 = OpAccessChain %v2f64ptr %ssbo64 %zero %x %c_i32_3\n"
8361                                             "            OpStore %outloc_3 %val64_3\n";
8362                 }
8363 
8364                 spec.assembly          = shaderTemplate.specialize(specs);
8365                 spec.numWorkGroups     = IVec3(cTypes[tyIdx].count, 1, 1);
8366                 spec.verifyIO          = check64BitFloats;
8367                 const unsigned padding = isUBO ? cTypes[tyIdx].padding16UBO : cTypes[tyIdx].padding16SSBO;
8368 
8369                 if (cTypes[tyIdx].dataType == SCALAR || cTypes[tyIdx].dataType == MAT2X2)
8370                 {
8371                     DE_ASSERT(cTypes[tyIdx].dataType != MAT2X2 || padding == 0);
8372                     spec.inputs.push_back(
8373                         Resource(BufferSp(new Float16Buffer(float16Data, padding)), CAPABILITIES[capIdx].dtype));
8374                 }
8375                 else if (cTypes[tyIdx].dataType == VEC2)
8376                 {
8377                     vector<tcu::Vector<deFloat16, 2>> float16Vec2Data(numElements / 2);
8378                     for (size_t elemIdx = 0; elemIdx < numElements; elemIdx++)
8379                     {
8380                         float16Vec2Data[elemIdx / 2][elemIdx % 2] = float16Data[elemIdx];
8381                     }
8382 
8383                     typedef Buffer<tcu::Vector<deFloat16, 2>> Float16Vec2Buffer;
8384                     spec.inputs.push_back(Resource(BufferSp(new Float16Vec2Buffer(float16Vec2Data, padding)),
8385                                                    CAPABILITIES[capIdx].dtype));
8386                 }
8387 
8388                 spec.outputs.push_back(Resource(
8389                     BufferSp(new Float64Buffer(cTypes[tyIdx].useConstantIndex ? float64DataConstIdx : float64Data))));
8390                 spec.extensions.push_back("VK_KHR_16bit_storage");
8391 
8392                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
8393                 spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
8394 
8395                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
8396             }
8397     }
8398 }
8399 
addCompute16bitStoragePushConstant16To64Group(tcu::TestCaseGroup * group)8400 void addCompute16bitStoragePushConstant16To64Group(tcu::TestCaseGroup *group)
8401 {
8402     tcu::TestContext &testCtx = group->getTestContext();
8403     de::Random rnd(deStringHash(group->getName()));
8404     const int numElements = 64;
8405 
8406     const StringTemplate shaderTemplate("OpCapability Shader\n"
8407                                         "OpCapability StoragePushConstant16\n"
8408                                         "OpCapability Float64\n"
8409                                         "OpExtension \"SPV_KHR_16bit_storage\"\n"
8410                                         "OpMemoryModel Logical GLSL450\n"
8411                                         "OpEntryPoint GLCompute %main \"main\" %id\n"
8412                                         "OpExecutionMode %main LocalSize 1 1 1\n"
8413                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
8414 
8415                                         "${stride}"
8416 
8417                                         "OpDecorate %PC16 Block\n"
8418                                         "OpMemberDecorate %PC16 0 Offset 0\n"
8419                                         "OpMemberDecorate %SSBO64 0 Offset 0\n"
8420                                         "OpDecorate %SSBO64 BufferBlock\n"
8421                                         "OpDecorate %ssbo64 DescriptorSet 0\n"
8422                                         "OpDecorate %ssbo64 Binding 0\n"
8423 
8424                                         "${matrix_decor:opt}\n"
8425 
8426                                         "%bool      = OpTypeBool\n"
8427                                         "%void      = OpTypeVoid\n"
8428                                         "%voidf     = OpTypeFunction %void\n"
8429                                         "%u32       = OpTypeInt 32 0\n"
8430                                         "%i32       = OpTypeInt 32 1\n"
8431                                         "%f32       = OpTypeFloat 32\n"
8432                                         "%uvec3     = OpTypeVector %u32 3\n"
8433                                         "%fvec3     = OpTypeVector %f32 3\n"
8434                                         "%uvec3ptr  = OpTypePointer Input %uvec3\n"
8435                                         "%i32ptr    = OpTypePointer Uniform %i32\n"
8436                                         "%f32ptr    = OpTypePointer Uniform %f32\n"
8437 
8438                                         "%zero      = OpConstant %i32 0\n"
8439                                         "%c_i32_1   = OpConstant %i32 1\n"
8440                                         "%c_i32_8   = OpConstant %i32 8\n"
8441                                         "%c_i32_16  = OpConstant %i32 16\n"
8442                                         "%c_i32_32  = OpConstant %i32 32\n"
8443                                         "%c_i32_64  = OpConstant %i32 64\n"
8444 
8445                                         "%i32arr    = OpTypeArray %i32 %c_i32_64\n"
8446                                         "%f32arr    = OpTypeArray %f32 %c_i32_64\n"
8447 
8448                                         "${types}\n"
8449                                         "${matrix_types:opt}\n"
8450 
8451                                         "%PC16      = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
8452                                         "%pp_PC16   = OpTypePointer PushConstant %PC16\n"
8453                                         "%pc16      = OpVariable %pp_PC16 PushConstant\n"
8454                                         "%SSBO64    = OpTypeStruct %${matrix_prefix:opt}${base64}arr\n"
8455                                         "%up_SSBO64 = OpTypePointer Uniform %SSBO64\n"
8456                                         "%ssbo64    = OpVariable %up_SSBO64 Uniform\n"
8457 
8458                                         "%id        = OpVariable %uvec3ptr Input\n"
8459 
8460                                         "%main      = OpFunction %void None %voidf\n"
8461                                         "%label     = OpLabel\n"
8462                                         "%idval     = OpLoad %uvec3 %id\n"
8463                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
8464                                         "%inloc     = OpAccessChain %${base16}ptr %pc16 %zero %x ${index0:opt}\n"
8465                                         "%val16     = OpLoad %${base16} %inloc\n"
8466                                         "%val64     = ${convert} %${base64} %val16\n"
8467                                         "%outloc    = OpAccessChain %${base64}ptr %ssbo64 %zero %x ${index0:opt}\n"
8468                                         "             OpStore %outloc %val64\n"
8469                                         "${matrix_store:opt}\n"
8470                                         "             OpReturn\n"
8471                                         "             OpFunctionEnd\n");
8472 
8473     { // floats
8474         const char floatTypes[] = "%f16       = OpTypeFloat 16\n"
8475                                   "%f16ptr    = OpTypePointer PushConstant %f16\n"
8476                                   "%f16arr    = OpTypeArray %f16 %c_i32_64\n"
8477                                   "%f64       = OpTypeFloat 64\n"
8478                                   "%f64ptr    = OpTypePointer Uniform %f64\n"
8479                                   "%f64arr    = OpTypeArray %f64 %c_i32_64\n"
8480                                   "%v4f16     = OpTypeVector %f16 4\n"
8481                                   "%v4f32     = OpTypeVector %f32 4\n"
8482                                   "%v4f64     = OpTypeVector %f64 4\n"
8483                                   "%v4f16ptr  = OpTypePointer PushConstant %v4f16\n"
8484                                   "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
8485                                   "%v4f64ptr  = OpTypePointer Uniform %v4f64\n"
8486                                   "%v4f16arr  = OpTypeArray %v4f16 %c_i32_16\n"
8487                                   "%v4f32arr  = OpTypeArray %v4f32 %c_i32_16\n"
8488                                   "%v4f64arr  = OpTypeArray %v4f64 %c_i32_16\n";
8489 
8490         struct CompositeType
8491         {
8492             const char *name;
8493             const char *base64;
8494             const char *base16;
8495             const char *stride;
8496             unsigned count;
8497         };
8498 
8499         const CompositeType cTypes[] = {
8500             {"scalar", "f64", "f16", "OpDecorate %f64arr ArrayStride 8\nOpDecorate %f16arr ArrayStride 2\n",
8501              numElements},
8502             {"vector", "v4f64", "v4f16", "OpDecorate %v4f64arr ArrayStride 32\nOpDecorate %v4f16arr ArrayStride 8\n",
8503              numElements / 4},
8504             {"matrix", "v4f64", "v4f16",
8505              "OpDecorate %m2v4f64arr ArrayStride 64\nOpDecorate %m2v4f16arr ArrayStride 16\n", numElements / 8},
8506         };
8507 
8508         vector<deFloat16> float16Data = getFloat16s(rnd, numElements);
8509         vector<double> float64Data;
8510 
8511         float64Data.reserve(numElements);
8512         for (uint32_t numIdx = 0; numIdx < numElements; ++numIdx)
8513             float64Data.push_back(deFloat16To64(float16Data[numIdx]));
8514 
8515         for (uint32_t tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
8516         {
8517             ComputeShaderSpec spec;
8518             map<string, string> specs;
8519             string testName = string(cTypes[tyIdx].name) + "_float";
8520 
8521             specs["stride"]  = cTypes[tyIdx].stride;
8522             specs["base64"]  = cTypes[tyIdx].base64;
8523             specs["base16"]  = cTypes[tyIdx].base16;
8524             specs["types"]   = floatTypes;
8525             specs["convert"] = "OpFConvert";
8526 
8527             if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
8528             {
8529                 specs["index0"]        = "%zero";
8530                 specs["matrix_prefix"] = "m2";
8531                 specs["matrix_types"]  = "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
8532                                          "%m2v4f64 = OpTypeMatrix %v4f64 2\n"
8533                                          "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
8534                                          "%m2v4f64arr = OpTypeArray %m2v4f64 %c_i32_8\n";
8535                 specs["matrix_decor"]  = "OpMemberDecorate %SSBO64 0 ColMajor\n"
8536                                          "OpMemberDecorate %SSBO64 0 MatrixStride 32\n"
8537                                          "OpMemberDecorate %PC16 0 ColMajor\n"
8538                                          "OpMemberDecorate %PC16 0 MatrixStride 8\n";
8539                 specs["matrix_store"]  = "%inloc_1  = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
8540                                          "%val16_1  = OpLoad %v4f16 %inloc_1\n"
8541                                          "%val64_1  = OpFConvert %v4f64 %val16_1\n"
8542                                          "%outloc_1 = OpAccessChain %v4f64ptr %ssbo64 %zero %x %c_i32_1\n"
8543                                          "            OpStore %outloc_1 %val64_1\n";
8544             }
8545 
8546             spec.assembly      = shaderTemplate.specialize(specs);
8547             spec.numWorkGroups = IVec3(cTypes[tyIdx].count, 1, 1);
8548             spec.verifyIO      = check64BitFloats;
8549             spec.pushConstants = BufferSp(new Float16Buffer(float16Data));
8550 
8551             spec.outputs.push_back(BufferSp(new Float64Buffer(float64Data)));
8552 
8553             spec.extensions.push_back("VK_KHR_16bit_storage");
8554 
8555             spec.requestedVulkanFeatures.coreFeatures.shaderFloat64            = VK_TRUE;
8556             spec.requestedVulkanFeatures.ext16BitStorage.storagePushConstant16 = true;
8557 
8558             group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
8559         }
8560     }
8561 }
8562 
8563 } // namespace
8564 
create16BitStorageComputeGroup(tcu::TestContext & testCtx)8565 tcu::TestCaseGroup *create16BitStorageComputeGroup(tcu::TestContext &testCtx)
8566 {
8567     // Compute tests for VK_KHR_16bit_storage extension
8568     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "16bit_storage"));
8569     // 64bit floats to 16bit tests under capability StorageUniform{|BufferBlock}
8570     addTestGroup(group.get(), "uniform_64_to_16", addCompute16bitStorageUniform64To16Group);
8571     // 32bit floats/ints to 16bit tests under capability StorageUniform{|BufferBlock}
8572     addTestGroup(group.get(), "uniform_32_to_16", addCompute16bitStorageUniform32To16Group);
8573     // 16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}
8574     addTestGroup(group.get(), "uniform_16_to_32", addCompute16bitStorageUniform16To32Group);
8575     // 16bit floats to 64bit tests under capability StorageUniform{|BufferBlock}
8576     addTestGroup(group.get(), "uniform_16_to_64", addCompute16bitStorageUniform16To64Group);
8577     // 16bit floats/ints to 32bit tests under capability StoragePushConstant16
8578     addTestGroup(group.get(), "push_constant_16_to_32", addCompute16bitStoragePushConstant16To32Group);
8579     // 16bit floats to 64bit tests under capability StoragePushConstant16
8580     addTestGroup(group.get(), "push_constant_16_to_64", addCompute16bitStoragePushConstant16To64Group);
8581     // 16bit floats struct to 32bit tests under capability StorageUniform{|BufferBlock}
8582     addTestGroup(group.get(), "uniform_16struct_to_32struct", addCompute16bitStorageUniform16StructTo32StructGroup);
8583     // 32bit floats struct to 16bit tests under capability StorageUniform{|BufferBlock}
8584     addTestGroup(group.get(), "uniform_32struct_to_16struct", addCompute16bitStorageUniform32StructTo16StructGroup);
8585     // mixed type of 8bit and 32bit struct
8586     addTestGroup(group.get(), "struct_mixed_types", addCompute16bitStructMixedTypesGroup);
8587     // 16bit floats/ints to 16bit tests under capability StorageUniformBufferBlock16
8588     addTestGroup(group.get(), "uniform_16_to_16", addCompute16bitStorageUniform16To16Group);
8589     // chain access 16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}
8590     addTestGroup(group.get(), "uniform_16_to_32_chainaccess", addCompute16bitStorageUniform16To32ChainAccessGroup);
8591 
8592     return group.release();
8593 }
8594 
create16BitStorageGraphicsGroup(tcu::TestContext & testCtx)8595 tcu::TestCaseGroup *create16BitStorageGraphicsGroup(tcu::TestContext &testCtx)
8596 {
8597     // Graphics tests for VK_KHR_16bit_storage extension
8598     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "16bit_storage"));
8599 
8600     // 64-bit floats into 16-bit tests under capability StorageUniform{|BufferBlock}16
8601     addTestGroup(group.get(), "uniform_float_64_to_16", addGraphics16BitStorageUniformFloat64To16Group);
8602     // 32-bit floats into 16-bit tests under capability StorageUniform{|BufferBlock}16
8603     addTestGroup(group.get(), "uniform_float_32_to_16", addGraphics16BitStorageUniformFloat32To16Group);
8604     // 16-bit floats into 32-bit testsunder capability StorageUniform{|BufferBlock}16
8605     addTestGroup(group.get(), "uniform_float_16_to_32", addGraphics16BitStorageUniformFloat16To32Group);
8606     // 16-bit floats into 64-bit testsunder capability StorageUniform{|BufferBlock}16
8607     addTestGroup(group.get(), "uniform_float_16_to_64", addGraphics16BitStorageUniformFloat16To64Group);
8608     // 32-bit int into 16-bit tests under capability StorageUniform{|BufferBlock}16
8609     addTestGroup(group.get(), "uniform_int_32_to_16", addGraphics16BitStorageUniformInt32To16Group);
8610     // 16-bit int into 32-bit tests under capability StorageUniform{|BufferBlock}16
8611     addTestGroup(group.get(), "uniform_int_16_to_32", addGraphics16BitStorageUniformInt16To32Group);
8612     // 64-bit floats into 16-bit tests under capability StorageInputOutput16
8613     addTestGroup(group.get(), "input_output_float_64_to_16", addGraphics16BitStorageInputOutputFloat64To16Group);
8614     // 32-bit floats into 16-bit tests under capability StorageInputOutput16
8615     addTestGroup(group.get(), "input_output_float_32_to_16", addGraphics16BitStorageInputOutputFloat32To16Group);
8616     // 16-bit floats into 32-bit tests under capability StorageInputOutput16
8617     addTestGroup(group.get(), "input_output_float_16_to_32", addGraphics16BitStorageInputOutputFloat16To32Group);
8618     // 16-bit floats pass-through tests under capability StorageInputOutput16
8619     addTestGroup(group.get(), "input_output_float_16_to_16", addGraphics16BitStorageInputOutputFloat16To16Group);
8620     // 16-bit floats into 64-bit tests under capability StorageInputOutput16
8621     addTestGroup(group.get(), "input_output_float_16_to_64", addGraphics16BitStorageInputOutputFloat16To64Group);
8622     // 16-bit floats pass-through to two outputs tests under capability StorageInputOutput16
8623     addTestGroup(group.get(), "input_output_float_16_to_16x2", addGraphics16BitStorageInputOutputFloat16To16x2Group);
8624     // 16-bit ints pass-through to two outputs tests under capability StorageInputOutput16
8625     addTestGroup(group.get(), "input_output_int_16_to_16x2", addGraphics16BitStorageInputOutputInt16To16x2Group);
8626     // 32-bit int into 16-bit tests under capability StorageInputOutput16
8627     addTestGroup(group.get(), "input_output_int_32_to_16", addGraphics16BitStorageInputOutputInt32To16Group);
8628     // 16-bit int into 32-bit tests under capability StorageInputOutput16
8629     addTestGroup(group.get(), "input_output_int_16_to_32", addGraphics16BitStorageInputOutputInt16To32Group);
8630     // 16-bit int into 16-bit tests under capability StorageInputOutput16
8631     addTestGroup(group.get(), "input_output_int_16_to_16", addGraphics16BitStorageInputOutputInt16To16Group);
8632     // 16-bit floats into 32-bit tests under capability StoragePushConstant16
8633     addTestGroup(group.get(), "push_constant_float_16_to_32", addGraphics16BitStoragePushConstantFloat16To32Group);
8634     // 16-bit floats into 64-bit tests under capability StoragePushConstant16
8635     addTestGroup(group.get(), "push_constant_float_16_to_64", addGraphics16BitStoragePushConstantFloat16To64Group);
8636     // 16-bit int into 32-bit tests under capability StoragePushConstant16
8637     addTestGroup(group.get(), "push_constant_int_16_to_32", addGraphics16BitStoragePushConstantInt16To32Group);
8638     // 16-bit float struct into 32-bit tests under capability StorageUniform{|BufferBlock}16
8639     addTestGroup(group.get(), "uniform_16struct_to_32struct", addGraphics16BitStorageUniformStructFloat16To32Group);
8640     // 32-bit float struct into 16-bit tests under capability StorageUniform{|BufferBlock}16
8641     addTestGroup(group.get(), "uniform_32struct_to_16struct", addGraphics16BitStorageUniformStructFloat32To16Group);
8642     // mixed type of 8bit and 32bit struct
8643     addTestGroup(group.get(), "struct_mixed_types", addGraphics16bitStructMixedTypesGroup);
8644 
8645     return group.release();
8646 }
8647 
8648 } // namespace SpirVAssembly
8649 } // namespace vkt
8650