xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/ssbo/vktSSBOLayoutCase.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief SSBO layout case.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "tcuFloat.hpp"
27 #include "deInt32.h"
28 #include "deMath.h"
29 #include "deMemory.h"
30 #include "deRandom.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deString.h"
33 #include "deStringUtil.hpp"
34 #include "gluContextInfo.hpp"
35 #include "gluShaderProgram.hpp"
36 #include "gluShaderUtil.hpp"
37 #include "gluVarType.hpp"
38 #include "gluVarTypeUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include "vktSSBOLayoutCase.hpp"
41 
42 #include "vkBuilderUtil.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkPrograms.hpp"
45 #include "vkQueryUtil.hpp"
46 #include "vkRef.hpp"
47 #include "vkRefUtil.hpp"
48 #include "vkTypeUtil.hpp"
49 #include "vkCmdUtil.hpp"
50 
51 #include "util/vktTypeComparisonUtil.hpp"
52 
53 namespace vkt
54 {
55 namespace ssbo
56 {
57 
58 using glu::StructMember;
59 using glu::StructType;
60 using glu::VarType;
61 using std::string;
62 using std::vector;
63 using tcu::TestLog;
64 
65 struct LayoutFlagsFmt
66 {
67     uint32_t flags;
LayoutFlagsFmtvkt::ssbo::LayoutFlagsFmt68     LayoutFlagsFmt(uint32_t flags_) : flags(flags_)
69     {
70     }
71 };
72 
operator <<(std::ostream & str,const LayoutFlagsFmt & fmt)73 std::ostream &operator<<(std::ostream &str, const LayoutFlagsFmt &fmt)
74 {
75     static const struct
76     {
77         uint32_t bit;
78         const char *token;
79     } bitDesc[] = {{LAYOUT_STD140, "std140"},
80                    {LAYOUT_STD430, "std430"},
81                    {LAYOUT_SCALAR, "scalar"},
82                    {LAYOUT_ROW_MAJOR, "row_major"},
83                    {LAYOUT_COLUMN_MAJOR, "column_major"}};
84 
85     uint32_t remBits = fmt.flags;
86     for (int descNdx = 0; descNdx < DE_LENGTH_OF_ARRAY(bitDesc); descNdx++)
87     {
88         if (remBits & bitDesc[descNdx].bit)
89         {
90             if (remBits != fmt.flags)
91                 str << ", ";
92             str << bitDesc[descNdx].token;
93             remBits &= ~bitDesc[descNdx].bit;
94         }
95     }
96     DE_ASSERT(remBits == 0);
97     return str;
98 }
99 
100 // BufferVar implementation.
101 
BufferVar(const char * name,const VarType & type,uint32_t flags)102 BufferVar::BufferVar(const char *name, const VarType &type, uint32_t flags)
103     : m_name(name)
104     , m_type(type)
105     , m_flags(flags)
106     , m_offset(~0u)
107 {
108 }
109 
110 // BufferBlock implementation.
111 
BufferBlock(const char * blockName)112 BufferBlock::BufferBlock(const char *blockName) : m_blockName(blockName), m_arraySize(-1), m_flags(0)
113 {
114     setArraySize(0);
115 }
116 
setArraySize(int arraySize)117 void BufferBlock::setArraySize(int arraySize)
118 {
119     DE_ASSERT(arraySize >= 0);
120     m_lastUnsizedArraySizes.resize(arraySize == 0 ? 1 : arraySize, 0);
121     m_arraySize = arraySize;
122 }
123 
operator <<(std::ostream & stream,const BlockLayoutEntry & entry)124 std::ostream &operator<<(std::ostream &stream, const BlockLayoutEntry &entry)
125 {
126     stream << entry.name << " { name = " << entry.name << ", size = " << entry.size << ", activeVarIndices = [";
127 
128     for (vector<int>::const_iterator i = entry.activeVarIndices.begin(); i != entry.activeVarIndices.end(); i++)
129     {
130         if (i != entry.activeVarIndices.begin())
131             stream << ", ";
132         stream << *i;
133     }
134 
135     stream << "] }";
136     return stream;
137 }
138 
isUnsizedArray(const BufferVarLayoutEntry & entry)139 static bool isUnsizedArray(const BufferVarLayoutEntry &entry)
140 {
141     DE_ASSERT(entry.arraySize != 0 || entry.topLevelArraySize != 0);
142     return entry.arraySize == 0 || entry.topLevelArraySize == 0;
143 }
144 
operator <<(std::ostream & stream,const BufferVarLayoutEntry & entry)145 std::ostream &operator<<(std::ostream &stream, const BufferVarLayoutEntry &entry)
146 {
147     stream << entry.name << " { type = " << glu::getDataTypeName(entry.type) << ", blockNdx = " << entry.blockNdx
148            << ", offset = " << entry.offset << ", arraySize = " << entry.arraySize
149            << ", arrayStride = " << entry.arrayStride << ", matrixStride = " << entry.matrixStride
150            << ", topLevelArraySize = " << entry.topLevelArraySize
151            << ", topLevelArrayStride = " << entry.topLevelArrayStride
152            << ", isRowMajor = " << (entry.isRowMajor ? "true" : "false") << " }";
153     return stream;
154 }
155 
156 // \todo [2012-01-24 pyry] Speed up lookups using hash.
157 
getVariableIndex(const string & name) const158 int BufferLayout::getVariableIndex(const string &name) const
159 {
160     for (int ndx = 0; ndx < (int)bufferVars.size(); ndx++)
161     {
162         if (bufferVars[ndx].name == name)
163             return ndx;
164     }
165     return -1;
166 }
167 
getBlockIndex(const string & name) const168 int BufferLayout::getBlockIndex(const string &name) const
169 {
170     for (int ndx = 0; ndx < (int)blocks.size(); ndx++)
171     {
172         if (blocks[ndx].name == name)
173             return ndx;
174     }
175     return -1;
176 }
177 
178 // ShaderInterface implementation.
179 
ShaderInterface(void)180 ShaderInterface::ShaderInterface(void)
181 {
182 }
183 
~ShaderInterface(void)184 ShaderInterface::~ShaderInterface(void)
185 {
186     for (std::vector<StructType *>::iterator i = m_structs.begin(); i != m_structs.end(); i++)
187         delete *i;
188 
189     for (std::vector<BufferBlock *>::iterator i = m_bufferBlocks.begin(); i != m_bufferBlocks.end(); i++)
190         delete *i;
191 }
192 
allocStruct(const char * name)193 StructType &ShaderInterface::allocStruct(const char *name)
194 {
195     m_structs.reserve(m_structs.size() + 1);
196     m_structs.push_back(new StructType(name));
197     return *m_structs.back();
198 }
199 
200 struct StructNameEquals
201 {
202     std::string name;
203 
StructNameEqualsvkt::ssbo::StructNameEquals204     StructNameEquals(const char *name_) : name(name_)
205     {
206     }
207 
operator ()vkt::ssbo::StructNameEquals208     bool operator()(const StructType *type) const
209     {
210         return type->getTypeName() && name == type->getTypeName();
211     }
212 };
213 
findStruct(const char * name) const214 const StructType *ShaderInterface::findStruct(const char *name) const
215 {
216     std::vector<StructType *>::const_iterator pos =
217         std::find_if(m_structs.begin(), m_structs.end(), StructNameEquals(name));
218     return pos != m_structs.end() ? *pos : DE_NULL;
219 }
220 
getNamedStructs(std::vector<const StructType * > & structs) const221 void ShaderInterface::getNamedStructs(std::vector<const StructType *> &structs) const
222 {
223     for (std::vector<StructType *>::const_iterator i = m_structs.begin(); i != m_structs.end(); i++)
224     {
225         if ((*i)->getTypeName() != DE_NULL)
226             structs.push_back(*i);
227     }
228 }
229 
allocBlock(const char * name)230 BufferBlock &ShaderInterface::allocBlock(const char *name)
231 {
232     m_bufferBlocks.reserve(m_bufferBlocks.size() + 1);
233     m_bufferBlocks.push_back(new BufferBlock(name));
234     return *m_bufferBlocks.back();
235 }
236 
237 namespace // Utilities
238 {
239 // Layout computation.
240 
getDataTypeByteSize(glu::DataType type)241 int getDataTypeByteSize(glu::DataType type)
242 {
243     if (deInRange32(type, glu::TYPE_UINT8, glu::TYPE_UINT8_VEC4) ||
244         deInRange32(type, glu::TYPE_INT8, glu::TYPE_INT8_VEC4))
245     {
246         return glu::getDataTypeScalarSize(type) * (int)sizeof(uint8_t);
247     }
248     else if (deInRange32(type, glu::TYPE_UINT16, glu::TYPE_UINT16_VEC4) ||
249              deInRange32(type, glu::TYPE_INT16, glu::TYPE_INT16_VEC4) ||
250              deInRange32(type, glu::TYPE_FLOAT16, glu::TYPE_FLOAT16_VEC4))
251     {
252         return glu::getDataTypeScalarSize(type) * (int)sizeof(uint16_t);
253     }
254     else
255     {
256         return glu::getDataTypeScalarSize(type) * (int)sizeof(uint32_t);
257     }
258 }
259 
getDataTypeByteAlignment(glu::DataType type)260 int getDataTypeByteAlignment(glu::DataType type)
261 {
262     switch (type)
263     {
264     case glu::TYPE_FLOAT:
265     case glu::TYPE_INT:
266     case glu::TYPE_UINT:
267     case glu::TYPE_BOOL:
268         return 1 * (int)sizeof(uint32_t);
269 
270     case glu::TYPE_FLOAT_VEC2:
271     case glu::TYPE_INT_VEC2:
272     case glu::TYPE_UINT_VEC2:
273     case glu::TYPE_BOOL_VEC2:
274         return 2 * (int)sizeof(uint32_t);
275 
276     case glu::TYPE_FLOAT_VEC3:
277     case glu::TYPE_INT_VEC3:
278     case glu::TYPE_UINT_VEC3:
279     case glu::TYPE_BOOL_VEC3: // Fall-through to vec4
280 
281     case glu::TYPE_FLOAT_VEC4:
282     case glu::TYPE_INT_VEC4:
283     case glu::TYPE_UINT_VEC4:
284     case glu::TYPE_BOOL_VEC4:
285         return 4 * (int)sizeof(uint32_t);
286 
287     case glu::TYPE_UINT8:
288     case glu::TYPE_INT8:
289         return 1 * (int)sizeof(uint8_t);
290 
291     case glu::TYPE_UINT8_VEC2:
292     case glu::TYPE_INT8_VEC2:
293         return 2 * (int)sizeof(uint8_t);
294 
295     case glu::TYPE_UINT8_VEC3:
296     case glu::TYPE_INT8_VEC3: // Fall-through to vec4
297 
298     case glu::TYPE_UINT8_VEC4:
299     case glu::TYPE_INT8_VEC4:
300         return 4 * (int)sizeof(uint8_t);
301 
302     case glu::TYPE_UINT16:
303     case glu::TYPE_INT16:
304     case glu::TYPE_FLOAT16:
305         return 1 * (int)sizeof(uint16_t);
306 
307     case glu::TYPE_UINT16_VEC2:
308     case glu::TYPE_INT16_VEC2:
309     case glu::TYPE_FLOAT16_VEC2:
310         return 2 * (int)sizeof(uint16_t);
311 
312     case glu::TYPE_UINT16_VEC3:
313     case glu::TYPE_INT16_VEC3:
314     case glu::TYPE_FLOAT16_VEC3: // Fall-through to vec4
315 
316     case glu::TYPE_UINT16_VEC4:
317     case glu::TYPE_INT16_VEC4:
318     case glu::TYPE_FLOAT16_VEC4:
319         return 4 * (int)sizeof(uint16_t);
320 
321     default:
322         DE_ASSERT(false);
323         return 0;
324     }
325 }
326 
computeStd140BaseAlignment(const VarType & type,uint32_t layoutFlags)327 int computeStd140BaseAlignment(const VarType &type, uint32_t layoutFlags)
328 {
329     const int vec4Alignment = (int)sizeof(uint32_t) * 4;
330 
331     if (type.isBasicType())
332     {
333         glu::DataType basicType = type.getBasicType();
334 
335         if (glu::isDataTypeMatrix(basicType))
336         {
337             const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
338             const int vecSize =
339                 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
340             const int vecAlign = deAlign32(getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize)), vec4Alignment);
341 
342             return vecAlign;
343         }
344         else
345             return getDataTypeByteAlignment(basicType);
346     }
347     else if (type.isArrayType())
348     {
349         int elemAlignment = computeStd140BaseAlignment(type.getElementType(), layoutFlags);
350 
351         // Round up to alignment of vec4
352         return deAlign32(elemAlignment, vec4Alignment);
353     }
354     else
355     {
356         DE_ASSERT(type.isStructType());
357 
358         int maxBaseAlignment = 0;
359 
360         for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
361              memberIter != type.getStructPtr()->end(); memberIter++)
362             maxBaseAlignment =
363                 de::max(maxBaseAlignment, computeStd140BaseAlignment(memberIter->getType(), layoutFlags));
364 
365         return deAlign32(maxBaseAlignment, vec4Alignment);
366     }
367 }
368 
computeStd430BaseAlignment(const VarType & type,uint32_t layoutFlags)369 int computeStd430BaseAlignment(const VarType &type, uint32_t layoutFlags)
370 {
371     // Otherwise identical to std140 except that alignment of structures and arrays
372     // are not rounded up to alignment of vec4.
373 
374     if (type.isBasicType())
375     {
376         glu::DataType basicType = type.getBasicType();
377 
378         if (glu::isDataTypeMatrix(basicType))
379         {
380             const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
381             const int vecSize =
382                 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
383             const int vecAlign = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
384             return vecAlign;
385         }
386         else
387             return getDataTypeByteAlignment(basicType);
388     }
389     else if (type.isArrayType())
390     {
391         return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
392     }
393     else
394     {
395         DE_ASSERT(type.isStructType());
396 
397         int maxBaseAlignment = 0;
398 
399         for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
400              memberIter != type.getStructPtr()->end(); memberIter++)
401             maxBaseAlignment =
402                 de::max(maxBaseAlignment, computeStd430BaseAlignment(memberIter->getType(), layoutFlags));
403 
404         return maxBaseAlignment;
405     }
406 }
407 
computeRelaxedBlockBaseAlignment(const VarType & type,uint32_t layoutFlags)408 int computeRelaxedBlockBaseAlignment(const VarType &type, uint32_t layoutFlags)
409 {
410     if (type.isBasicType())
411     {
412         glu::DataType basicType = type.getBasicType();
413 
414         if (glu::isDataTypeVector(basicType))
415             return getDataTypeByteAlignment(glu::getDataTypeScalarType(basicType));
416 
417         if (glu::isDataTypeMatrix(basicType))
418         {
419             const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
420             const int vecSize =
421                 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
422             const int vecAlign = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
423             return vecAlign;
424         }
425         else
426             return getDataTypeByteAlignment(basicType);
427     }
428     else if (type.isArrayType())
429         return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
430     else
431     {
432         DE_ASSERT(type.isStructType());
433 
434         int maxBaseAlignment = 0;
435         for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
436              memberIter != type.getStructPtr()->end(); memberIter++)
437             maxBaseAlignment =
438                 de::max(maxBaseAlignment, computeRelaxedBlockBaseAlignment(memberIter->getType(), layoutFlags));
439 
440         return maxBaseAlignment;
441     }
442 }
443 
computeScalarBlockAlignment(const VarType & type,uint32_t layoutFlags)444 int computeScalarBlockAlignment(const VarType &type, uint32_t layoutFlags)
445 {
446     if (type.isBasicType())
447     {
448         return getDataTypeByteAlignment(glu::getDataTypeScalarType(type.getBasicType()));
449     }
450     else if (type.isArrayType())
451         return computeScalarBlockAlignment(type.getElementType(), layoutFlags);
452     else
453     {
454         DE_ASSERT(type.isStructType());
455 
456         int maxBaseAlignment = 0;
457         for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
458              memberIter != type.getStructPtr()->end(); memberIter++)
459             maxBaseAlignment =
460                 de::max(maxBaseAlignment, computeScalarBlockAlignment(memberIter->getType(), layoutFlags));
461 
462         return maxBaseAlignment;
463     }
464 }
465 
mergeLayoutFlags(uint32_t prevFlags,uint32_t newFlags)466 inline uint32_t mergeLayoutFlags(uint32_t prevFlags, uint32_t newFlags)
467 {
468     const uint32_t packingMask = LAYOUT_STD430 | LAYOUT_STD140 | LAYOUT_RELAXED | LAYOUT_SCALAR;
469     const uint32_t matrixMask  = LAYOUT_ROW_MAJOR | LAYOUT_COLUMN_MAJOR;
470 
471     uint32_t mergedFlags = 0;
472 
473     mergedFlags |= ((newFlags & packingMask) ? newFlags : prevFlags) & packingMask;
474     mergedFlags |= ((newFlags & matrixMask) ? newFlags : prevFlags) & matrixMask;
475 
476     return mergedFlags;
477 }
478 
479 //! Appends all child elements to layout, returns value that should be appended to offset.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,int baseOffset,const std::string & curPrefix,const VarType & type,uint32_t layoutFlags)480 int computeReferenceLayout(BufferLayout &layout, int curBlockNdx, int baseOffset, const std::string &curPrefix,
481                            const VarType &type, uint32_t layoutFlags)
482 {
483     // Reference layout uses std430 rules by default. std140 rules are
484     // choosen only for blocks that have std140 layout.
485     const int baseAlignment     = (layoutFlags & LAYOUT_SCALAR) != 0 ? computeScalarBlockAlignment(type, layoutFlags) :
486                                   (layoutFlags & LAYOUT_STD140) != 0 ? computeStd140BaseAlignment(type, layoutFlags) :
487                                   (layoutFlags & LAYOUT_RELAXED) != 0 ?
488                                                                        computeRelaxedBlockBaseAlignment(type, layoutFlags) :
489                                                                        computeStd430BaseAlignment(type, layoutFlags);
490     int curOffset               = deAlign32(baseOffset, baseAlignment);
491     const int topLevelArraySize = 1; // Default values
492     const int topLevelArrayStride = 0;
493 
494     if (type.isBasicType())
495     {
496         const glu::DataType basicType = type.getBasicType();
497         BufferVarLayoutEntry entry;
498 
499         entry.name                = curPrefix;
500         entry.type                = basicType;
501         entry.arraySize           = 1;
502         entry.arrayStride         = 0;
503         entry.matrixStride        = 0;
504         entry.topLevelArraySize   = topLevelArraySize;
505         entry.topLevelArrayStride = topLevelArrayStride;
506         entry.blockNdx            = curBlockNdx;
507 
508         if (glu::isDataTypeMatrix(basicType))
509         {
510             // Array of vectors as specified in rules 5 & 7.
511             const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
512             const int vecSize =
513                 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
514             const glu::DataType vecType = glu::getDataTypeFloatVec(vecSize);
515             const int numVecs =
516                 isRowMajor ? glu::getDataTypeMatrixNumRows(basicType) : glu::getDataTypeMatrixNumColumns(basicType);
517             const int vecStride = (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
518 
519             entry.offset       = curOffset;
520             entry.matrixStride = vecStride;
521             entry.isRowMajor   = isRowMajor;
522 
523             curOffset += numVecs * entry.matrixStride;
524         }
525         else
526         {
527             if (!(layoutFlags & LAYOUT_SCALAR) && (layoutFlags & LAYOUT_RELAXED) && glu::isDataTypeVector(basicType) &&
528                 (getDataTypeByteSize(basicType) <= 16 ?
529                      curOffset / 16 != (curOffset + getDataTypeByteSize(basicType) - 1) / 16 :
530                      curOffset % 16 != 0))
531                 curOffset = deIntRoundToPow2(curOffset, 16);
532 
533             // Scalar or vector.
534             entry.offset = curOffset;
535 
536             curOffset += getDataTypeByteSize(basicType);
537         }
538 
539         layout.bufferVars.push_back(entry);
540     }
541     else if (type.isArrayType())
542     {
543         const VarType &elemType = type.getElementType();
544 
545         if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
546         {
547             // Array of scalars or vectors.
548             const glu::DataType elemBasicType = elemType.getBasicType();
549             const int stride = (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) : baseAlignment;
550             BufferVarLayoutEntry entry;
551 
552             entry.name                = curPrefix + "[0]"; // Array variables are always postfixed with [0]
553             entry.type                = elemBasicType;
554             entry.blockNdx            = curBlockNdx;
555             entry.offset              = curOffset;
556             entry.arraySize           = type.getArraySize();
557             entry.arrayStride         = stride;
558             entry.matrixStride        = 0;
559             entry.topLevelArraySize   = topLevelArraySize;
560             entry.topLevelArrayStride = topLevelArrayStride;
561 
562             curOffset += stride * type.getArraySize();
563 
564             layout.bufferVars.push_back(entry);
565         }
566         else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
567         {
568             // Array of matrices.
569             const glu::DataType elemBasicType = elemType.getBasicType();
570             const bool isRowMajor             = !!(layoutFlags & LAYOUT_ROW_MAJOR);
571             const int vecSize                 = isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType) :
572                                                              glu::getDataTypeMatrixNumRows(elemBasicType);
573             const glu::DataType vecType       = glu::getDataTypeFloatVec(vecSize);
574             const int numVecs                 = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType) :
575                                                              glu::getDataTypeMatrixNumColumns(elemBasicType);
576             const int vecStride = (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
577             BufferVarLayoutEntry entry;
578 
579             entry.name                = curPrefix + "[0]"; // Array variables are always postfixed with [0]
580             entry.type                = elemBasicType;
581             entry.blockNdx            = curBlockNdx;
582             entry.offset              = curOffset;
583             entry.arraySize           = type.getArraySize();
584             entry.arrayStride         = vecStride * numVecs;
585             entry.matrixStride        = vecStride;
586             entry.isRowMajor          = isRowMajor;
587             entry.topLevelArraySize   = topLevelArraySize;
588             entry.topLevelArrayStride = topLevelArrayStride;
589 
590             curOffset += entry.arrayStride * type.getArraySize();
591 
592             layout.bufferVars.push_back(entry);
593         }
594         else
595         {
596             DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
597 
598             for (int elemNdx = 0; elemNdx < type.getArraySize(); elemNdx++)
599                 curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset,
600                                                     curPrefix + "[" + de::toString(elemNdx) + "]",
601                                                     type.getElementType(), layoutFlags);
602         }
603     }
604     else
605     {
606         DE_ASSERT(type.isStructType());
607 
608         for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
609              memberIter != type.getStructPtr()->end(); memberIter++)
610             curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "." + memberIter->getName(),
611                                                 memberIter->getType(), layoutFlags);
612 
613         if (!(layoutFlags & LAYOUT_SCALAR))
614             curOffset = deAlign32(curOffset, baseAlignment);
615     }
616 
617     return curOffset - baseOffset;
618 }
619 
620 //! Appends all child elements to layout, returns offset increment.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,const std::string & blockPrefix,int baseOffset,const BufferVar & bufVar,uint32_t blockLayoutFlags)621 int computeReferenceLayout(BufferLayout &layout, int curBlockNdx, const std::string &blockPrefix, int baseOffset,
622                            const BufferVar &bufVar, uint32_t blockLayoutFlags)
623 {
624     const VarType &varType       = bufVar.getType();
625     const uint32_t combinedFlags = mergeLayoutFlags(blockLayoutFlags, bufVar.getFlags());
626 
627     if (varType.isArrayType())
628     {
629         // Top-level arrays need special care.
630         const int topLevelArraySize = varType.getArraySize() == VarType::UNSIZED_ARRAY ? 0 : varType.getArraySize();
631         const string prefix         = blockPrefix + bufVar.getName() + "[0]";
632         const bool isStd140         = (blockLayoutFlags & LAYOUT_STD140) != 0;
633         const int vec4Align         = (int)sizeof(uint32_t) * 4;
634         const int baseAlignment =
635             (blockLayoutFlags & LAYOUT_SCALAR) != 0  ? computeScalarBlockAlignment(varType, combinedFlags) :
636             isStd140                                 ? computeStd140BaseAlignment(varType, combinedFlags) :
637             (blockLayoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(varType, combinedFlags) :
638                                                        computeStd430BaseAlignment(varType, combinedFlags);
639         int curOffset           = deAlign32(baseOffset, baseAlignment);
640         const VarType &elemType = varType.getElementType();
641 
642         if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
643         {
644             // Array of scalars or vectors.
645             const glu::DataType elemBasicType = elemType.getBasicType();
646             const int elemBaseAlign           = getDataTypeByteAlignment(elemBasicType);
647             const int stride = (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) :
648                                isStd140                           ? deAlign32(elemBaseAlign, vec4Align) :
649                                                                     elemBaseAlign;
650 
651             BufferVarLayoutEntry entry;
652 
653             entry.name                = prefix;
654             entry.topLevelArraySize   = 1;
655             entry.topLevelArrayStride = 0;
656             entry.type                = elemBasicType;
657             entry.blockNdx            = curBlockNdx;
658             entry.offset              = curOffset;
659             entry.arraySize           = topLevelArraySize;
660             entry.arrayStride         = stride;
661             entry.matrixStride        = 0;
662 
663             layout.bufferVars.push_back(entry);
664 
665             curOffset += stride * topLevelArraySize;
666         }
667         else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
668         {
669             // Array of matrices.
670             const glu::DataType elemBasicType = elemType.getBasicType();
671             const bool isRowMajor             = !!(combinedFlags & LAYOUT_ROW_MAJOR);
672             const int vecSize                 = isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType) :
673                                                              glu::getDataTypeMatrixNumRows(elemBasicType);
674             const int numVecs                 = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType) :
675                                                              glu::getDataTypeMatrixNumColumns(elemBasicType);
676             const glu::DataType vecType       = glu::getDataTypeFloatVec(vecSize);
677             const int vecBaseAlign            = getDataTypeByteAlignment(vecType);
678             const int stride                  = (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) :
679                                                 isStd140 ? deAlign32(vecBaseAlign, vec4Align) :
680                                                            vecBaseAlign;
681 
682             BufferVarLayoutEntry entry;
683 
684             entry.name                = prefix;
685             entry.topLevelArraySize   = 1;
686             entry.topLevelArrayStride = 0;
687             entry.type                = elemBasicType;
688             entry.blockNdx            = curBlockNdx;
689             entry.offset              = curOffset;
690             entry.arraySize           = topLevelArraySize;
691             entry.arrayStride         = stride * numVecs;
692             entry.matrixStride        = stride;
693             entry.isRowMajor          = isRowMajor;
694 
695             layout.bufferVars.push_back(entry);
696 
697             curOffset += entry.arrayStride * topLevelArraySize;
698         }
699         else
700         {
701             DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
702 
703             // Struct base alignment is not added multiple times as curOffset supplied to computeReferenceLayout
704             // was already aligned correctly. Thus computeReferenceLayout should not add any extra padding
705             // before struct. Padding after struct will be added as it should.
706             //
707             // Stride could be computed prior to creating child elements, but it would essentially require running
708             // the layout computation twice. Instead we fix stride to child elements afterwards.
709 
710             const int firstChildNdx = (int)layout.bufferVars.size();
711 
712             const int size   = computeReferenceLayout(layout, curBlockNdx, deAlign32(curOffset, baseAlignment), prefix,
713                                                       varType.getElementType(), combinedFlags);
714             const int stride = deAlign32(size, baseAlignment);
715 
716             for (int childNdx = firstChildNdx; childNdx < (int)layout.bufferVars.size(); childNdx++)
717             {
718                 layout.bufferVars[childNdx].topLevelArraySize   = topLevelArraySize;
719                 layout.bufferVars[childNdx].topLevelArrayStride = stride;
720             }
721 
722             if (topLevelArraySize != 0)
723                 curOffset += stride * (topLevelArraySize - 1) + size;
724         }
725 
726         return curOffset - baseOffset;
727     }
728     else
729         return computeReferenceLayout(layout, curBlockNdx, baseOffset, blockPrefix + bufVar.getName(), varType,
730                                       combinedFlags);
731 }
732 
computeReferenceLayout(BufferLayout & layout,ShaderInterface & interface)733 void computeReferenceLayout(BufferLayout &layout, ShaderInterface &interface)
734 {
735     int numBlocks = interface.getNumBlocks();
736 
737     for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
738     {
739         BufferBlock &block      = interface.getBlock(blockNdx);
740         bool hasInstanceName    = block.getInstanceName() != DE_NULL;
741         std::string blockPrefix = hasInstanceName ? (std::string(block.getBlockName()) + ".") : std::string("");
742         int curOffset           = 0;
743         int activeBlockNdx      = (int)layout.blocks.size();
744         int firstVarNdx         = (int)layout.bufferVars.size();
745 
746         size_t oldSize = layout.bufferVars.size();
747         for (BufferBlock::iterator varIter = block.begin(); varIter != block.end(); varIter++)
748         {
749             BufferVar &bufVar = *varIter;
750             curOffset +=
751                 computeReferenceLayout(layout, activeBlockNdx, blockPrefix, curOffset, bufVar, block.getFlags());
752             if (block.getFlags() & LAYOUT_RELAXED)
753             {
754                 DE_ASSERT(!(layout.bufferVars.size() <= oldSize));
755                 bufVar.setOffset(layout.bufferVars[oldSize].offset);
756             }
757             oldSize = layout.bufferVars.size();
758         }
759 
760         int varIndicesEnd = (int)layout.bufferVars.size();
761         int blockSize     = curOffset;
762         int numInstances  = block.isArray() ? block.getArraySize() : 1;
763 
764         // Create block layout entries for each instance.
765         for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
766         {
767             // Allocate entry for instance.
768             layout.blocks.push_back(BlockLayoutEntry());
769             BlockLayoutEntry &blockEntry = layout.blocks.back();
770 
771             blockEntry.name = block.getBlockName();
772             blockEntry.size = blockSize;
773 
774             // Compute active variable set for block.
775             for (int varNdx = firstVarNdx; varNdx < varIndicesEnd; varNdx++)
776                 blockEntry.activeVarIndices.push_back(varNdx);
777 
778             if (block.isArray())
779                 blockEntry.name += "[" + de::toString(instanceNdx) + "]";
780         }
781     }
782 }
783 
784 // Value generator.
785 
generateValue(const BufferVarLayoutEntry & entry,int unsizedArraySize,void * basePtr,de::Random & rnd)786 void generateValue(const BufferVarLayoutEntry &entry, int unsizedArraySize, void *basePtr, de::Random &rnd)
787 {
788     const glu::DataType scalarType = glu::getDataTypeScalarType(entry.type);
789     const int scalarSize           = glu::getDataTypeScalarSize(entry.type);
790     const int arraySize            = entry.arraySize == 0 ? unsizedArraySize : entry.arraySize;
791     const int arrayStride          = entry.arrayStride;
792     const int topLevelSize         = entry.topLevelArraySize == 0 ? unsizedArraySize : entry.topLevelArraySize;
793     const int topLevelStride       = entry.topLevelArrayStride;
794     const bool isMatrix            = glu::isDataTypeMatrix(entry.type);
795     const int numVecs              = isMatrix ? (entry.isRowMajor ? glu::getDataTypeMatrixNumRows(entry.type) :
796                                                                     glu::getDataTypeMatrixNumColumns(entry.type)) :
797                                                 1;
798     const int vecSize              = scalarSize / numVecs;
799     const size_t compSize          = getDataTypeByteSize(scalarType);
800 
801     DE_ASSERT(scalarSize % numVecs == 0);
802     DE_ASSERT(topLevelSize >= 0);
803     DE_ASSERT(arraySize >= 0);
804 
805     for (int topElemNdx = 0; topElemNdx < topLevelSize; topElemNdx++)
806     {
807         uint8_t *const topElemPtr = (uint8_t *)basePtr + entry.offset + topElemNdx * topLevelStride;
808 
809         for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
810         {
811             uint8_t *const elemPtr = topElemPtr + elemNdx * arrayStride;
812 
813             for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
814             {
815                 uint8_t *const vecPtr = elemPtr + (isMatrix ? vecNdx * entry.matrixStride : 0);
816 
817                 for (int compNdx = 0; compNdx < vecSize; compNdx++)
818                 {
819                     uint8_t *const compPtr = vecPtr + compSize * compNdx;
820 
821                     switch (scalarType)
822                     {
823                     case glu::TYPE_FLOAT:
824                         *((float *)compPtr) = (float)rnd.getInt(-9, 9);
825                         break;
826                     case glu::TYPE_INT:
827                         *((int *)compPtr) = rnd.getInt(-9, 9);
828                         break;
829                     case glu::TYPE_UINT:
830                         *((uint32_t *)compPtr) = (uint32_t)rnd.getInt(0, 9);
831                         break;
832                     case glu::TYPE_INT8:
833                         *((int8_t *)compPtr) = (int8_t)rnd.getInt(-9, 9);
834                         break;
835                     case glu::TYPE_UINT8:
836                         *((uint8_t *)compPtr) = (uint8_t)rnd.getInt(0, 9);
837                         break;
838                     case glu::TYPE_INT16:
839                         *((int16_t *)compPtr) = (int16_t)rnd.getInt(-9, 9);
840                         break;
841                     case glu::TYPE_UINT16:
842                         *((uint16_t *)compPtr) = (uint16_t)rnd.getInt(0, 9);
843                         break;
844                     case glu::TYPE_FLOAT16:
845                         *((tcu::float16_t *)compPtr) = tcu::Float16((float)rnd.getInt(-9, 9)).bits();
846                         break;
847                     // \note Random bit pattern is used for true values. Spec states that all non-zero values are
848                     //       interpreted as true but some implementations fail this.
849                     case glu::TYPE_BOOL:
850                         *((uint32_t *)compPtr) = rnd.getBool() ? rnd.getUint32() | 1u : 0u;
851                         break;
852                     default:
853                         DE_ASSERT(false);
854                     }
855                 }
856             }
857         }
858     }
859 }
860 
generateValues(const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,uint32_t seed)861 void generateValues(const BufferLayout &layout, const vector<BlockDataPtr> &blockPointers, uint32_t seed)
862 {
863     de::Random rnd(seed);
864     const int numBlocks = (int)layout.blocks.size();
865 
866     DE_ASSERT(numBlocks == (int)blockPointers.size());
867 
868     for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
869     {
870         const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
871         const BlockDataPtr &blockPtr        = blockPointers[blockNdx];
872         const int numEntries                = (int)layout.blocks[blockNdx].activeVarIndices.size();
873 
874         for (int entryNdx = 0; entryNdx < numEntries; entryNdx++)
875         {
876             const int varNdx                     = blockLayout.activeVarIndices[entryNdx];
877             const BufferVarLayoutEntry &varEntry = layout.bufferVars[varNdx];
878 
879             generateValue(varEntry, blockPtr.lastUnsizedArraySize, blockPtr.ptr, rnd);
880         }
881     }
882 }
883 
884 // Shader generator.
885 
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const BufferBlock & bufferBlock)886 void collectUniqueBasicTypes(std::set<glu::DataType> &basicTypes, const BufferBlock &bufferBlock)
887 {
888     for (BufferBlock::const_iterator iter = bufferBlock.begin(); iter != bufferBlock.end(); ++iter)
889         vkt::typecomputil::collectUniqueBasicTypes(basicTypes, iter->getType());
890 }
891 
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const ShaderInterface & interface)892 void collectUniqueBasicTypes(std::set<glu::DataType> &basicTypes, const ShaderInterface &interface)
893 {
894     for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
895         collectUniqueBasicTypes(basicTypes, interface.getBlock(ndx));
896 }
897 
generateCompareFuncs(std::ostream & str,const ShaderInterface & interface)898 void generateCompareFuncs(std::ostream &str, const ShaderInterface &interface)
899 {
900     std::set<glu::DataType> types;
901     std::set<glu::DataType> compareFuncs;
902 
903     // Collect unique basic types
904     collectUniqueBasicTypes(types, interface);
905 
906     // Set of compare functions required
907     for (std::set<glu::DataType>::const_iterator iter = types.begin(); iter != types.end(); ++iter)
908     {
909         vkt::typecomputil::getCompareDependencies(compareFuncs, *iter);
910     }
911 
912     for (int type = 0; type < glu::TYPE_LAST; ++type)
913     {
914         if (compareFuncs.find(glu::DataType(type)) != compareFuncs.end())
915             str << vkt::typecomputil::getCompareFuncForType(glu::DataType(type));
916     }
917 }
918 
usesRelaxedLayout(const ShaderInterface & interface)919 bool usesRelaxedLayout(const ShaderInterface &interface)
920 {
921     //If any of blocks has LAYOUT_RELAXED flag
922     for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
923     {
924         if (interface.getBlock(ndx).getFlags() & LAYOUT_RELAXED)
925             return true;
926     }
927     return false;
928 }
929 
uses16BitStorage(const ShaderInterface & interface)930 bool uses16BitStorage(const ShaderInterface &interface)
931 {
932     // If any of blocks has LAYOUT_16BIT_STORAGE flag
933     for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
934     {
935         if (interface.getBlock(ndx).getFlags() & LAYOUT_16BIT_STORAGE)
936             return true;
937     }
938     return false;
939 }
940 
uses8BitStorage(const ShaderInterface & interface)941 bool uses8BitStorage(const ShaderInterface &interface)
942 {
943     // If any of blocks has LAYOUT_8BIT_STORAGE flag
944     for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
945     {
946         if (interface.getBlock(ndx).getFlags() & LAYOUT_8BIT_STORAGE)
947             return true;
948     }
949     return false;
950 }
951 
usesScalarLayout(const ShaderInterface & interface)952 bool usesScalarLayout(const ShaderInterface &interface)
953 {
954     // If any of blocks has LAYOUT_SCALAR flag
955     for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
956     {
957         if (interface.getBlock(ndx).getFlags() & LAYOUT_SCALAR)
958             return true;
959     }
960     return false;
961 }
962 
usesDescriptorIndexing(const ShaderInterface & interface)963 bool usesDescriptorIndexing(const ShaderInterface &interface)
964 {
965     // If any of blocks has DESCRIPTOR_INDEXING flag
966     for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
967     {
968         if (interface.getBlock(ndx).getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
969             return true;
970     }
971     return false;
972 }
973 
974 struct Indent
975 {
976     int level;
Indentvkt::ssbo::__anon70c2de810211::Indent977     Indent(int level_) : level(level_)
978     {
979     }
980 };
981 
operator <<(std::ostream & str,const Indent & indent)982 std::ostream &operator<<(std::ostream &str, const Indent &indent)
983 {
984     for (int i = 0; i < indent.level; i++)
985         str << "\t";
986     return str;
987 }
988 
generateDeclaration(std::ostream & src,const BufferVar & bufferVar,int indentLevel)989 void generateDeclaration(std::ostream &src, const BufferVar &bufferVar, int indentLevel)
990 {
991     // \todo [pyry] Qualifiers
992     if ((bufferVar.getFlags() & LAYOUT_MASK) != 0)
993         src << "layout(" << LayoutFlagsFmt(bufferVar.getFlags() & LAYOUT_MASK) << ") ";
994     else if (bufferVar.getOffset() != ~0u)
995         src << "layout(offset = " << bufferVar.getOffset() << ") ";
996 
997     src << glu::declare(bufferVar.getType(), bufferVar.getName(), indentLevel);
998 }
999 
generateDeclaration(std::ostream & src,const BufferBlock & block,int bindingPoint,bool usePhysStorageBuffer)1000 void generateDeclaration(std::ostream &src, const BufferBlock &block, int bindingPoint, bool usePhysStorageBuffer)
1001 {
1002     src << "layout(";
1003     if ((block.getFlags() & LAYOUT_MASK) != 0)
1004         src << LayoutFlagsFmt(block.getFlags() & LAYOUT_MASK) << ", ";
1005 
1006     if (usePhysStorageBuffer)
1007         src << "buffer_reference";
1008     else
1009         src << "binding = " << bindingPoint;
1010 
1011     src << ") ";
1012 
1013     bool readonly = true;
1014     for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1015     {
1016         const BufferVar &bufVar = *varIter;
1017         if (bufVar.getFlags() & ACCESS_WRITE)
1018         {
1019             readonly = false;
1020             break;
1021         }
1022     }
1023     if (readonly)
1024         src << "readonly ";
1025 
1026     src << "buffer " << block.getBlockName();
1027     src << "\n{\n";
1028 
1029     for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1030     {
1031         src << Indent(1);
1032 
1033         generateDeclaration(src, *varIter, 1 /* indent level */);
1034         src << ";\n";
1035     }
1036 
1037     src << "}";
1038 
1039     if (!usePhysStorageBuffer)
1040     {
1041         if (block.getInstanceName() != DE_NULL)
1042         {
1043             src << " " << block.getInstanceName();
1044             if (block.getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
1045                 src << "[]";
1046             else if (block.isArray())
1047                 src << "[" << block.getArraySize() << "]";
1048         }
1049         else
1050             DE_ASSERT(!block.isArray());
1051     }
1052 
1053     src << ";\n";
1054 }
1055 
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,bool singleCol,int colNumber,const void * valuePtr)1056 void generateImmMatrixSrc(std::ostream &src, glu::DataType basicType, int matrixStride, bool isRowMajor, bool singleCol,
1057                           int colNumber, const void *valuePtr)
1058 {
1059     DE_ASSERT(glu::isDataTypeMatrix(basicType));
1060 
1061     const int compSize = sizeof(uint32_t);
1062     const int numRows  = glu::getDataTypeMatrixNumRows(basicType);
1063     const int numCols  = glu::getDataTypeMatrixNumColumns(basicType);
1064 
1065     src << glu::getDataTypeName(singleCol ? glu::getDataTypeMatrixColumnType(basicType) : basicType) << "(";
1066 
1067     // Constructed in column-wise order.
1068     bool firstElem = true;
1069     for (int colNdx = 0; colNdx < numCols; colNdx++)
1070     {
1071         if (singleCol && colNdx != colNumber)
1072             continue;
1073 
1074         for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1075         {
1076             const uint8_t *compPtr =
1077                 (const uint8_t *)valuePtr +
1078                 (isRowMajor ? rowNdx * matrixStride + colNdx * compSize : colNdx * matrixStride + rowNdx * compSize);
1079 
1080             if (!firstElem)
1081                 src << ", ";
1082 
1083             src << de::floatToString(*((const float *)compPtr), 1);
1084             firstElem = false;
1085         }
1086     }
1087 
1088     src << ")";
1089 }
1090 
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,const void * valuePtr,const char * resultVar,const char * typeName,const string shaderName)1091 void generateImmMatrixSrc(std::ostream &src, glu::DataType basicType, int matrixStride, bool isRowMajor,
1092                           const void *valuePtr, const char *resultVar, const char *typeName, const string shaderName)
1093 {
1094     const int compSize = sizeof(uint32_t);
1095     const int numRows  = glu::getDataTypeMatrixNumRows(basicType);
1096     const int numCols  = glu::getDataTypeMatrixNumColumns(basicType);
1097 
1098     typeName = "float";
1099     for (int colNdex = 0; colNdex < numCols; colNdex++)
1100     {
1101         for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1102         {
1103             src << "\t" << resultVar << " = compare_" << typeName << "(" << shaderName << "[" << colNdex << "]["
1104                 << rowNdex << "], ";
1105             const uint8_t *compPtr =
1106                 (const uint8_t *)valuePtr + (isRowMajor ? rowNdex * matrixStride + colNdex * compSize :
1107                                                           colNdex * matrixStride + rowNdex * compSize);
1108 
1109             src << de::floatToString(*((const float *)compPtr), 1);
1110             src << ") && " << resultVar << ";\n";
1111         }
1112     }
1113 
1114     typeName = "vec";
1115     for (int colNdex = 0; colNdex < numCols; colNdex++)
1116     {
1117         src << "\t" << resultVar << " = compare_" << typeName << numRows << "(" << shaderName << "[" << colNdex << "], "
1118             << typeName << numRows << "(";
1119         for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1120         {
1121             const uint8_t *compPtr =
1122                 (const uint8_t *)valuePtr + (isRowMajor ? (rowNdex * matrixStride + colNdex * compSize) :
1123                                                           (colNdex * matrixStride + rowNdex * compSize));
1124             src << de::floatToString(*((const float *)compPtr), 1);
1125 
1126             if (rowNdex < numRows - 1)
1127                 src << ", ";
1128         }
1129         src << ")) && " << resultVar << ";\n";
1130     }
1131 }
1132 
generateImmScalarVectorSrc(std::ostream & src,glu::DataType basicType,const void * valuePtr)1133 void generateImmScalarVectorSrc(std::ostream &src, glu::DataType basicType, const void *valuePtr)
1134 {
1135     DE_ASSERT(glu::isDataTypeFloatOrVec(basicType) || glu::isDataTypeIntOrIVec(basicType) ||
1136               glu::isDataTypeUintOrUVec(basicType) || glu::isDataTypeBoolOrBVec(basicType) ||
1137               glu::isDataTypeExplicitPrecision(basicType));
1138 
1139     const glu::DataType scalarType = glu::getDataTypeScalarType(basicType);
1140     const int scalarSize           = glu::getDataTypeScalarSize(basicType);
1141     const size_t compSize          = getDataTypeByteSize(scalarType);
1142 
1143     if (scalarSize > 1)
1144         src << glu::getDataTypeName(vkt::typecomputil::getPromoteType(basicType)) << "(";
1145 
1146     for (int scalarNdx = 0; scalarNdx < scalarSize; scalarNdx++)
1147     {
1148         const uint8_t *compPtr = (const uint8_t *)valuePtr + scalarNdx * compSize;
1149 
1150         if (scalarNdx > 0)
1151             src << ", ";
1152 
1153         switch (scalarType)
1154         {
1155         case glu::TYPE_FLOAT16:
1156             src << de::floatToString(tcu::Float16(*((const tcu::float16_t *)compPtr)).asFloat(), 1);
1157             break;
1158         case glu::TYPE_FLOAT:
1159             src << de::floatToString(*((const float *)compPtr), 1);
1160             break;
1161         case glu::TYPE_INT8:
1162             src << (uint32_t) * ((const int8_t *)compPtr);
1163             break;
1164         case glu::TYPE_INT16:
1165             src << *((const int16_t *)compPtr);
1166             break;
1167         case glu::TYPE_INT:
1168             src << *((const int *)compPtr);
1169             break;
1170         case glu::TYPE_UINT8:
1171             src << (uint32_t) * ((const uint8_t *)compPtr) << "u";
1172             break;
1173         case glu::TYPE_UINT16:
1174             src << *((const uint16_t *)compPtr) << "u";
1175             break;
1176         case glu::TYPE_UINT:
1177             src << *((const uint32_t *)compPtr) << "u";
1178             break;
1179         case glu::TYPE_BOOL:
1180             src << (*((const uint32_t *)compPtr) != 0u ? "true" : "false");
1181             break;
1182         default:
1183             DE_ASSERT(false);
1184         }
1185     }
1186 
1187     if (scalarSize > 1)
1188         src << ")";
1189 }
1190 
getAPIName(const BufferBlock & block,const BufferVar & var,const glu::TypeComponentVector & accessPath)1191 string getAPIName(const BufferBlock &block, const BufferVar &var, const glu::TypeComponentVector &accessPath)
1192 {
1193     std::ostringstream name;
1194 
1195     if (block.getInstanceName())
1196         name << block.getBlockName() << ".";
1197 
1198     name << var.getName();
1199 
1200     for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end();
1201          pathComp++)
1202     {
1203         if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1204         {
1205             const VarType curType       = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1206             const StructType *structPtr = curType.getStructPtr();
1207 
1208             name << "." << structPtr->getMember(pathComp->index).getName();
1209         }
1210         else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1211         {
1212             if (pathComp == accessPath.begin() || (pathComp + 1) == accessPath.end())
1213                 name << "[0]"; // Top- / bottom-level array
1214             else
1215                 name << "[" << pathComp->index << "]";
1216         }
1217         else
1218             DE_ASSERT(false);
1219     }
1220 
1221     return name.str();
1222 }
1223 
getShaderName(const BufferBlock & block,int instanceNdx,const BufferVar & var,const glu::TypeComponentVector & accessPath)1224 string getShaderName(const BufferBlock &block, int instanceNdx, const BufferVar &var,
1225                      const glu::TypeComponentVector &accessPath)
1226 {
1227     std::ostringstream name;
1228 
1229     if (block.getInstanceName())
1230     {
1231         name << block.getInstanceName();
1232 
1233         if (block.getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
1234             name << "[nonuniformEXT(" << instanceNdx << ")]";
1235         else if (block.isArray())
1236             name << "[" << instanceNdx << "]";
1237 
1238         name << ".";
1239     }
1240     else
1241         DE_ASSERT(instanceNdx == 0);
1242 
1243     name << var.getName();
1244 
1245     for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end();
1246          pathComp++)
1247     {
1248         if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1249         {
1250             const VarType curType       = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1251             const StructType *structPtr = curType.getStructPtr();
1252 
1253             name << "." << structPtr->getMember(pathComp->index).getName();
1254         }
1255         else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1256             name << "[" << pathComp->index << "]";
1257         else
1258             DE_ASSERT(false);
1259     }
1260 
1261     return name.str();
1262 }
1263 
computeOffset(const BufferVarLayoutEntry & varLayout,const glu::TypeComponentVector & accessPath)1264 int computeOffset(const BufferVarLayoutEntry &varLayout, const glu::TypeComponentVector &accessPath)
1265 {
1266     const int topLevelNdx = (accessPath.size() > 1 && accessPath.front().type == glu::VarTypeComponent::ARRAY_ELEMENT) ?
1267                                 accessPath.front().index :
1268                                 0;
1269     const int bottomLevelNdx = (!accessPath.empty() && accessPath.back().type == glu::VarTypeComponent::ARRAY_ELEMENT) ?
1270                                    accessPath.back().index :
1271                                    0;
1272 
1273     return varLayout.offset + varLayout.topLevelArrayStride * topLevelNdx + varLayout.arrayStride * bottomLevelNdx;
1274 }
1275 
generateCompareSrc(std::ostream & src,const char * resultVar,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath,MatrixLoadFlags matrixLoadFlag,int & compareLimit)1276 void generateCompareSrc(std::ostream &src, const char *resultVar, const BufferLayout &bufferLayout,
1277                         const BufferBlock &block, int instanceNdx, const BlockDataPtr &blockPtr,
1278                         const BufferVar &bufVar, const glu::SubTypeAccess &accessPath, MatrixLoadFlags matrixLoadFlag,
1279                         int &compareLimit)
1280 {
1281     const VarType curType = accessPath.getType();
1282 
1283     // if limit for number of performed compare operations was reached then skip remaining compares
1284     if (compareLimit < 1)
1285         return;
1286 
1287     if (curType.isArrayType())
1288     {
1289         const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ?
1290                                   block.getLastUnsizedArraySize(instanceNdx) :
1291                                   curType.getArraySize();
1292 
1293         for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1294             generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar,
1295                                accessPath.element(elemNdx), LOAD_FULL_MATRIX, compareLimit);
1296     }
1297     else if (curType.isStructType())
1298     {
1299         const int numMembers = curType.getStructPtr()->getNumMembers();
1300 
1301         for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1302             generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar,
1303                                accessPath.member(memberNdx), LOAD_FULL_MATRIX, compareLimit);
1304     }
1305     else
1306     {
1307         DE_ASSERT(curType.isBasicType());
1308         compareLimit--;
1309 
1310         const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1311         const int varNdx     = bufferLayout.getVariableIndex(apiName);
1312 
1313         DE_ASSERT(varNdx >= 0);
1314         {
1315             const BufferVarLayoutEntry &varLayout = bufferLayout.bufferVars[varNdx];
1316             const string shaderName               = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1317             const glu::DataType basicType         = curType.getBasicType();
1318             const bool isMatrix                   = glu::isDataTypeMatrix(basicType);
1319             const char *typeName                  = glu::getDataTypeName(basicType);
1320             const void *valuePtr = (const uint8_t *)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1321 
1322             if (isMatrix)
1323             {
1324                 if (matrixLoadFlag == LOAD_MATRIX_COMPONENTS)
1325                     generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr,
1326                                          resultVar, typeName, shaderName);
1327                 else
1328                 {
1329                     src << "\t" << resultVar << " = compare_" << typeName << "(" << shaderName << ", ";
1330                     generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, false, -1,
1331                                          valuePtr);
1332                     src << ") && " << resultVar << ";\n";
1333                 }
1334             }
1335             else
1336             {
1337                 const char *castName      = "";
1338                 glu::DataType promoteType = vkt::typecomputil::getPromoteType(basicType);
1339                 if (basicType != promoteType)
1340                     castName = glu::getDataTypeName(promoteType);
1341 
1342                 src << "\t" << resultVar << " = compare_" << typeName << "(" << castName << "(" << shaderName << "), ";
1343                 generateImmScalarVectorSrc(src, basicType, valuePtr);
1344                 src << ") && " << resultVar << ";\n";
1345             }
1346         }
1347     }
1348 }
1349 
generateCompareSrc(std::ostream & src,const char * resultVar,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,MatrixLoadFlags matrixLoadFlag)1350 void generateCompareSrc(std::ostream &src, const char *resultVar, const ShaderInterface &interface,
1351                         const BufferLayout &layout, const vector<BlockDataPtr> &blockPointers,
1352                         MatrixLoadFlags matrixLoadFlag)
1353 {
1354     // limit number of performed compare operations; some generated tests execute
1355     // large number of compare operations that result in slow compile times which
1356     // in turn result in test skip on slower platforms
1357     int compareLimit = 130;
1358 
1359     for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1360     {
1361         const BufferBlock &block = interface.getBlock(declNdx);
1362         const bool isArray       = block.isArray();
1363         const int numInstances   = isArray ? block.getArraySize() : 1;
1364 
1365         DE_ASSERT(!isArray || block.getInstanceName());
1366 
1367         for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1368         {
1369             const string instanceName =
1370                 block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1371             const int blockNdx           = layout.getBlockIndex(instanceName);
1372             const BlockDataPtr &blockPtr = blockPointers[blockNdx];
1373 
1374             for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1375             {
1376                 const BufferVar &bufVar = *varIter;
1377 
1378                 if ((bufVar.getFlags() & ACCESS_READ) == 0)
1379                     continue; // Don't read from that variable.
1380 
1381                 generateCompareSrc(src, resultVar, layout, block, instanceNdx, blockPtr, bufVar,
1382                                    glu::SubTypeAccess(bufVar.getType()), matrixLoadFlag, compareLimit);
1383             }
1384         }
1385     }
1386 }
1387 
1388 // \todo [2013-10-14 pyry] Almost identical to generateCompareSrc - unify?
1389 
generateWriteSrc(std::ostream & src,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath,MatrixStoreFlags matrixStoreFlag)1390 void generateWriteSrc(std::ostream &src, const BufferLayout &bufferLayout, const BufferBlock &block, int instanceNdx,
1391                       const BlockDataPtr &blockPtr, const BufferVar &bufVar, const glu::SubTypeAccess &accessPath,
1392                       MatrixStoreFlags matrixStoreFlag)
1393 {
1394     const VarType curType = accessPath.getType();
1395 
1396     if (curType.isArrayType())
1397     {
1398         const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ?
1399                                   block.getLastUnsizedArraySize(instanceNdx) :
1400                                   curType.getArraySize();
1401 
1402         for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1403             generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx),
1404                              matrixStoreFlag);
1405     }
1406     else if (curType.isStructType())
1407     {
1408         const int numMembers = curType.getStructPtr()->getNumMembers();
1409 
1410         for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1411             generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx),
1412                              matrixStoreFlag);
1413     }
1414     else
1415     {
1416         DE_ASSERT(curType.isBasicType());
1417 
1418         const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1419         const int varNdx     = bufferLayout.getVariableIndex(apiName);
1420 
1421         DE_ASSERT(varNdx >= 0);
1422         {
1423             const BufferVarLayoutEntry &varLayout = bufferLayout.bufferVars[varNdx];
1424             const string shaderName               = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1425             const glu::DataType basicType         = curType.getBasicType();
1426             const bool isMatrix                   = glu::isDataTypeMatrix(basicType);
1427             const void *valuePtr = (const uint8_t *)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1428 
1429             const char *castName      = "";
1430             glu::DataType promoteType = vkt::typecomputil::getPromoteType(basicType);
1431             if (basicType != promoteType)
1432                 castName = glu::getDataTypeName((!isMatrix || matrixStoreFlag == STORE_FULL_MATRIX) ?
1433                                                     basicType :
1434                                                     glu::getDataTypeMatrixColumnType(basicType));
1435 
1436             if (isMatrix)
1437             {
1438                 switch (matrixStoreFlag)
1439                 {
1440                 case STORE_FULL_MATRIX:
1441                 {
1442                     src << "\t" << shaderName << " = " << castName << "(";
1443                     generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, false, -1,
1444                                          valuePtr);
1445                     src << ");\n";
1446                     break;
1447                 }
1448                 case STORE_MATRIX_COLUMNS:
1449                 {
1450                     int numCols = glu::getDataTypeMatrixNumColumns(basicType);
1451                     for (int colIdx = 0; colIdx < numCols; ++colIdx)
1452                     {
1453                         src << "\t" << shaderName << "[" << colIdx << "]"
1454                             << " = " << castName << "(";
1455                         generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, true, colIdx,
1456                                              valuePtr);
1457                         src << ");\n";
1458                     }
1459                     break;
1460                 }
1461                 default:
1462                     DE_ASSERT(false);
1463                     break;
1464                 }
1465             }
1466             else
1467             {
1468                 src << "\t" << shaderName << " = " << castName << "(";
1469                 generateImmScalarVectorSrc(src, basicType, valuePtr);
1470                 src << ");\n";
1471             }
1472         }
1473     }
1474 }
1475 
generateWriteSrc(std::ostream & src,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,MatrixStoreFlags matrixStoreFlag)1476 void generateWriteSrc(std::ostream &src, const ShaderInterface &interface, const BufferLayout &layout,
1477                       const vector<BlockDataPtr> &blockPointers, MatrixStoreFlags matrixStoreFlag)
1478 {
1479     for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1480     {
1481         const BufferBlock &block = interface.getBlock(declNdx);
1482         const bool isArray       = block.isArray();
1483         const int numInstances   = isArray ? block.getArraySize() : 1;
1484 
1485         DE_ASSERT(!isArray || block.getInstanceName());
1486 
1487         for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1488         {
1489             const string instanceName =
1490                 block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1491             const int blockNdx           = layout.getBlockIndex(instanceName);
1492             const BlockDataPtr &blockPtr = blockPointers[blockNdx];
1493 
1494             for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1495             {
1496                 const BufferVar &bufVar = *varIter;
1497 
1498                 if ((bufVar.getFlags() & ACCESS_WRITE) == 0)
1499                     continue; // Don't write to that variable.
1500 
1501                 generateWriteSrc(src, layout, block, instanceNdx, blockPtr, bufVar,
1502                                  glu::SubTypeAccess(bufVar.getType()), matrixStoreFlag);
1503             }
1504         }
1505     }
1506 }
1507 
generateComputeShader(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & comparePtrs,const vector<BlockDataPtr> & writePtrs,MatrixLoadFlags matrixLoadFlag,MatrixStoreFlags matrixStoreFlag,bool usePhysStorageBuffer)1508 string generateComputeShader(const ShaderInterface &interface, const BufferLayout &layout,
1509                              const vector<BlockDataPtr> &comparePtrs, const vector<BlockDataPtr> &writePtrs,
1510                              MatrixLoadFlags matrixLoadFlag, MatrixStoreFlags matrixStoreFlag,
1511                              bool usePhysStorageBuffer)
1512 {
1513     std::ostringstream src;
1514 
1515     if (uses16BitStorage(interface) || uses8BitStorage(interface) || usesRelaxedLayout(interface) ||
1516         usesScalarLayout(interface) || usesDescriptorIndexing(interface))
1517     {
1518         src << "#version 450\n";
1519     }
1520     else
1521         src << "#version 310 es\n";
1522 
1523     src << "#extension GL_EXT_shader_16bit_storage : enable\n";
1524     src << "#extension GL_EXT_shader_8bit_storage : enable\n";
1525     src << "#extension GL_EXT_scalar_block_layout : enable\n";
1526     src << "#extension GL_EXT_buffer_reference : enable\n";
1527     src << "#extension GL_EXT_nonuniform_qualifier : enable\n";
1528     src << "layout(local_size_x = 1) in;\n";
1529     src << "\n";
1530 
1531     // Atomic counter for counting passed invocations.
1532     src << "layout(std140, binding = 0) buffer AcBlock { highp uint ac_numPassed; };\n\n";
1533 
1534     std::vector<const StructType *> namedStructs;
1535     interface.getNamedStructs(namedStructs);
1536     for (std::vector<const StructType *>::const_iterator structIter = namedStructs.begin();
1537          structIter != namedStructs.end(); structIter++)
1538         src << glu::declare(*structIter) << ";\n";
1539 
1540     {
1541         for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1542         {
1543             const BufferBlock &block = interface.getBlock(blockNdx);
1544             generateDeclaration(src, block, 1 + blockNdx, usePhysStorageBuffer);
1545         }
1546 
1547         if (usePhysStorageBuffer)
1548         {
1549             src << "layout (push_constant, std430) uniform PC {\n";
1550             for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1551             {
1552                 const BufferBlock &block = interface.getBlock(blockNdx);
1553                 if (block.getInstanceName() != DE_NULL)
1554                 {
1555                     src << "    " << block.getBlockName() << " " << block.getInstanceName();
1556                     if (block.isArray())
1557                         src << "[" << block.getArraySize() << "]";
1558                     src << ";\n";
1559                 }
1560             }
1561             src << "};\n";
1562         }
1563     }
1564 
1565     // Comparison utilities.
1566     src << "\n";
1567     generateCompareFuncs(src, interface);
1568 
1569     src << "\n"
1570            "void main (void)\n"
1571            "{\n"
1572            "    bool allOk = true;\n";
1573 
1574     // Value compare.
1575     generateCompareSrc(src, "allOk", interface, layout, comparePtrs, matrixLoadFlag);
1576 
1577     src << "    if (allOk)\n"
1578         << "        ac_numPassed++;\n"
1579         << "\n";
1580 
1581     // Value write.
1582     generateWriteSrc(src, interface, layout, writePtrs, matrixStoreFlag);
1583 
1584     src << "}\n";
1585 
1586     return src.str();
1587 }
1588 
copyBufferVarData(const BufferVarLayoutEntry & dstEntry,const BlockDataPtr & dstBlockPtr,const BufferVarLayoutEntry & srcEntry,const BlockDataPtr & srcBlockPtr)1589 void copyBufferVarData(const BufferVarLayoutEntry &dstEntry, const BlockDataPtr &dstBlockPtr,
1590                        const BufferVarLayoutEntry &srcEntry, const BlockDataPtr &srcBlockPtr)
1591 {
1592     DE_ASSERT(dstEntry.arraySize <= srcEntry.arraySize);
1593     DE_ASSERT(dstEntry.topLevelArraySize <= srcEntry.topLevelArraySize);
1594     DE_ASSERT(dstBlockPtr.lastUnsizedArraySize <= srcBlockPtr.lastUnsizedArraySize);
1595     DE_ASSERT(dstEntry.type == srcEntry.type);
1596 
1597     uint8_t *const dstBasePtr       = (uint8_t *)dstBlockPtr.ptr + dstEntry.offset;
1598     const uint8_t *const srcBasePtr = (const uint8_t *)srcBlockPtr.ptr + srcEntry.offset;
1599     const int scalarSize            = glu::getDataTypeScalarSize(dstEntry.type);
1600     const bool isMatrix             = glu::isDataTypeMatrix(dstEntry.type);
1601     glu::DataType scalarType        = glu::getDataTypeScalarType(dstEntry.type);
1602     const size_t compSize           = getDataTypeByteSize(scalarType);
1603     const int dstArraySize          = dstEntry.arraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.arraySize;
1604     const int dstArrayStride        = dstEntry.arrayStride;
1605     const int dstTopLevelSize =
1606         dstEntry.topLevelArraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.topLevelArraySize;
1607     const int dstTopLevelStride = dstEntry.topLevelArrayStride;
1608     const int srcArraySize      = srcEntry.arraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.arraySize;
1609     const int srcArrayStride    = srcEntry.arrayStride;
1610     const int srcTopLevelSize =
1611         srcEntry.topLevelArraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.topLevelArraySize;
1612     const int srcTopLevelStride = srcEntry.topLevelArrayStride;
1613 
1614     DE_ASSERT(dstArraySize <= srcArraySize && dstTopLevelSize <= srcTopLevelSize);
1615     DE_UNREF(srcArraySize && srcTopLevelSize);
1616 
1617     for (int topElemNdx = 0; topElemNdx < dstTopLevelSize; topElemNdx++)
1618     {
1619         uint8_t *const dstTopPtr       = dstBasePtr + topElemNdx * dstTopLevelStride;
1620         const uint8_t *const srcTopPtr = srcBasePtr + topElemNdx * srcTopLevelStride;
1621 
1622         for (int elementNdx = 0; elementNdx < dstArraySize; elementNdx++)
1623         {
1624             uint8_t *const dstElemPtr       = dstTopPtr + elementNdx * dstArrayStride;
1625             const uint8_t *const srcElemPtr = srcTopPtr + elementNdx * srcArrayStride;
1626 
1627             if (isMatrix)
1628             {
1629                 const int numRows = glu::getDataTypeMatrixNumRows(dstEntry.type);
1630                 const int numCols = glu::getDataTypeMatrixNumColumns(dstEntry.type);
1631 
1632                 for (int colNdx = 0; colNdx < numCols; colNdx++)
1633                 {
1634                     for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1635                     {
1636                         uint8_t *dstCompPtr =
1637                             dstElemPtr + (dstEntry.isRowMajor ? rowNdx * dstEntry.matrixStride + colNdx * compSize :
1638                                                                 colNdx * dstEntry.matrixStride + rowNdx * compSize);
1639                         const uint8_t *srcCompPtr =
1640                             srcElemPtr + (srcEntry.isRowMajor ? rowNdx * srcEntry.matrixStride + colNdx * compSize :
1641                                                                 colNdx * srcEntry.matrixStride + rowNdx * compSize);
1642 
1643                         DE_ASSERT((intptr_t)(srcCompPtr + compSize) - (intptr_t)srcBlockPtr.ptr <=
1644                                   (intptr_t)srcBlockPtr.size);
1645                         DE_ASSERT((intptr_t)(dstCompPtr + compSize) - (intptr_t)dstBlockPtr.ptr <=
1646                                   (intptr_t)dstBlockPtr.size);
1647                         deMemcpy(dstCompPtr, srcCompPtr, compSize);
1648                     }
1649                 }
1650             }
1651             else
1652             {
1653                 DE_ASSERT((intptr_t)(srcElemPtr + scalarSize * compSize) - (intptr_t)srcBlockPtr.ptr <=
1654                           (intptr_t)srcBlockPtr.size);
1655                 DE_ASSERT((intptr_t)(dstElemPtr + scalarSize * compSize) - (intptr_t)dstBlockPtr.ptr <=
1656                           (intptr_t)dstBlockPtr.size);
1657                 deMemcpy(dstElemPtr, srcElemPtr, scalarSize * compSize);
1658             }
1659         }
1660     }
1661 }
1662 
copyData(const BufferLayout & dstLayout,const vector<BlockDataPtr> & dstBlockPointers,const BufferLayout & srcLayout,const vector<BlockDataPtr> & srcBlockPointers)1663 void copyData(const BufferLayout &dstLayout, const vector<BlockDataPtr> &dstBlockPointers,
1664               const BufferLayout &srcLayout, const vector<BlockDataPtr> &srcBlockPointers)
1665 {
1666     // \note Src layout is used as reference in case of activeVarIndices happens to be incorrect in dstLayout blocks.
1667     int numBlocks = (int)srcLayout.blocks.size();
1668 
1669     for (int srcBlockNdx = 0; srcBlockNdx < numBlocks; srcBlockNdx++)
1670     {
1671         const BlockLayoutEntry &srcBlock = srcLayout.blocks[srcBlockNdx];
1672         const BlockDataPtr &srcBlockPtr  = srcBlockPointers[srcBlockNdx];
1673         int dstBlockNdx                  = dstLayout.getBlockIndex(srcBlock.name.c_str());
1674 
1675         if (dstBlockNdx >= 0)
1676         {
1677             DE_ASSERT(de::inBounds(dstBlockNdx, 0, (int)dstBlockPointers.size()));
1678 
1679             const BlockDataPtr &dstBlockPtr = dstBlockPointers[dstBlockNdx];
1680 
1681             for (vector<int>::const_iterator srcVarNdxIter = srcBlock.activeVarIndices.begin();
1682                  srcVarNdxIter != srcBlock.activeVarIndices.end(); srcVarNdxIter++)
1683             {
1684                 const BufferVarLayoutEntry &srcEntry = srcLayout.bufferVars[*srcVarNdxIter];
1685                 int dstVarNdx                        = dstLayout.getVariableIndex(srcEntry.name.c_str());
1686 
1687                 if (dstVarNdx >= 0)
1688                     copyBufferVarData(dstLayout.bufferVars[dstVarNdx], dstBlockPtr, srcEntry, srcBlockPtr);
1689             }
1690         }
1691     }
1692 }
1693 
copyNonWrittenData(const BufferLayout & layout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & srcBlockPtr,const BlockDataPtr & dstBlockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath)1694 void copyNonWrittenData(const BufferLayout &layout, const BufferBlock &block, int instanceNdx,
1695                         const BlockDataPtr &srcBlockPtr, const BlockDataPtr &dstBlockPtr, const BufferVar &bufVar,
1696                         const glu::SubTypeAccess &accessPath)
1697 {
1698     const VarType curType = accessPath.getType();
1699 
1700     if (curType.isArrayType())
1701     {
1702         const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ?
1703                                   block.getLastUnsizedArraySize(instanceNdx) :
1704                                   curType.getArraySize();
1705 
1706         for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1707             copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar,
1708                                accessPath.element(elemNdx));
1709     }
1710     else if (curType.isStructType())
1711     {
1712         const int numMembers = curType.getStructPtr()->getNumMembers();
1713 
1714         for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1715             copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar,
1716                                accessPath.member(memberNdx));
1717     }
1718     else
1719     {
1720         DE_ASSERT(curType.isBasicType());
1721 
1722         const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1723         const int varNdx     = layout.getVariableIndex(apiName);
1724 
1725         DE_ASSERT(varNdx >= 0);
1726         {
1727             const BufferVarLayoutEntry &varLayout = layout.bufferVars[varNdx];
1728             copyBufferVarData(varLayout, dstBlockPtr, varLayout, srcBlockPtr);
1729         }
1730     }
1731 }
1732 
copyNonWrittenData(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & srcPtrs,const vector<BlockDataPtr> & dstPtrs)1733 void copyNonWrittenData(const ShaderInterface &interface, const BufferLayout &layout,
1734                         const vector<BlockDataPtr> &srcPtrs, const vector<BlockDataPtr> &dstPtrs)
1735 {
1736     for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1737     {
1738         const BufferBlock &block = interface.getBlock(declNdx);
1739         const bool isArray       = block.isArray();
1740         const int numInstances   = isArray ? block.getArraySize() : 1;
1741 
1742         DE_ASSERT(!isArray || block.getInstanceName());
1743 
1744         for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1745         {
1746             const string instanceName =
1747                 block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1748             const int blockNdx              = layout.getBlockIndex(instanceName);
1749             const BlockDataPtr &srcBlockPtr = srcPtrs[blockNdx];
1750             const BlockDataPtr &dstBlockPtr = dstPtrs[blockNdx];
1751 
1752             for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1753             {
1754                 const BufferVar &bufVar = *varIter;
1755 
1756                 if (bufVar.getFlags() & ACCESS_WRITE)
1757                     continue;
1758 
1759                 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar,
1760                                    glu::SubTypeAccess(bufVar.getType()));
1761             }
1762         }
1763     }
1764 }
1765 
compareComponents(glu::DataType scalarType,const void * ref,const void * res,int numComps)1766 bool compareComponents(glu::DataType scalarType, const void *ref, const void *res, int numComps)
1767 {
1768     if (scalarType == glu::TYPE_FLOAT)
1769     {
1770         const float threshold = 0.05f; // Same as used in shaders - should be fine for values being used.
1771 
1772         for (int ndx = 0; ndx < numComps; ndx++)
1773         {
1774             const float refVal = *((const float *)ref + ndx);
1775             const float resVal = *((const float *)res + ndx);
1776 
1777             if (deFloatAbs(resVal - refVal) >= threshold)
1778                 return false;
1779         }
1780     }
1781     else if (scalarType == glu::TYPE_BOOL)
1782     {
1783         for (int ndx = 0; ndx < numComps; ndx++)
1784         {
1785             const uint32_t refVal = *((const uint32_t *)ref + ndx);
1786             const uint32_t resVal = *((const uint32_t *)res + ndx);
1787 
1788             if ((refVal != 0) != (resVal != 0))
1789                 return false;
1790         }
1791     }
1792     else if (scalarType == glu::TYPE_INT8 || scalarType == glu::TYPE_UINT8)
1793     {
1794         return deMemCmp(ref, res, numComps * sizeof(uint8_t)) == 0;
1795     }
1796     else if (scalarType == glu::TYPE_INT16 || scalarType == glu::TYPE_UINT16 || scalarType == glu::TYPE_FLOAT16)
1797     {
1798         return deMemCmp(ref, res, numComps * sizeof(uint16_t)) == 0;
1799     }
1800     else
1801     {
1802         DE_ASSERT(scalarType == glu::TYPE_INT || scalarType == glu::TYPE_UINT);
1803 
1804         return deMemCmp(ref, res, numComps * sizeof(uint32_t)) == 0;
1805     }
1806 
1807     return true;
1808 }
1809 
compareBufferVarData(tcu::TestLog & log,const BufferVarLayoutEntry & refEntry,const BlockDataPtr & refBlockPtr,const BufferVarLayoutEntry & resEntry,const BlockDataPtr & resBlockPtr)1810 bool compareBufferVarData(tcu::TestLog &log, const BufferVarLayoutEntry &refEntry, const BlockDataPtr &refBlockPtr,
1811                           const BufferVarLayoutEntry &resEntry, const BlockDataPtr &resBlockPtr)
1812 {
1813     DE_ASSERT(resEntry.arraySize <= refEntry.arraySize);
1814     DE_ASSERT(resEntry.topLevelArraySize <= refEntry.topLevelArraySize);
1815     DE_ASSERT(resBlockPtr.lastUnsizedArraySize <= refBlockPtr.lastUnsizedArraySize);
1816     DE_ASSERT(resEntry.type == refEntry.type);
1817 
1818     uint8_t *const resBasePtr       = (uint8_t *)resBlockPtr.ptr + resEntry.offset;
1819     const uint8_t *const refBasePtr = (const uint8_t *)refBlockPtr.ptr + refEntry.offset;
1820     const glu::DataType scalarType  = glu::getDataTypeScalarType(refEntry.type);
1821     const int scalarSize            = glu::getDataTypeScalarSize(resEntry.type);
1822     const bool isMatrix             = glu::isDataTypeMatrix(resEntry.type);
1823     const size_t compSize           = getDataTypeByteSize(scalarType);
1824     const int maxPrints             = 3;
1825     int numFailed                   = 0;
1826 
1827     const int resArraySize   = resEntry.arraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.arraySize;
1828     const int resArrayStride = resEntry.arrayStride;
1829     const int resTopLevelSize =
1830         resEntry.topLevelArraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.topLevelArraySize;
1831     const int resTopLevelStride = resEntry.topLevelArrayStride;
1832     const int refArraySize      = refEntry.arraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.arraySize;
1833     const int refArrayStride    = refEntry.arrayStride;
1834     const int refTopLevelSize =
1835         refEntry.topLevelArraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.topLevelArraySize;
1836     const int refTopLevelStride = refEntry.topLevelArrayStride;
1837 
1838     DE_ASSERT(resArraySize <= refArraySize && resTopLevelSize <= refTopLevelSize);
1839     DE_UNREF(refArraySize && refTopLevelSize);
1840 
1841     for (int topElemNdx = 0; topElemNdx < resTopLevelSize; topElemNdx++)
1842     {
1843         uint8_t *const resTopPtr       = resBasePtr + topElemNdx * resTopLevelStride;
1844         const uint8_t *const refTopPtr = refBasePtr + topElemNdx * refTopLevelStride;
1845 
1846         for (int elementNdx = 0; elementNdx < resArraySize; elementNdx++)
1847         {
1848             uint8_t *const resElemPtr       = resTopPtr + elementNdx * resArrayStride;
1849             const uint8_t *const refElemPtr = refTopPtr + elementNdx * refArrayStride;
1850 
1851             if (isMatrix)
1852             {
1853                 const int numRows = glu::getDataTypeMatrixNumRows(resEntry.type);
1854                 const int numCols = glu::getDataTypeMatrixNumColumns(resEntry.type);
1855                 bool isOk         = true;
1856 
1857                 for (int colNdx = 0; colNdx < numCols; colNdx++)
1858                 {
1859                     for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1860                     {
1861                         uint8_t *resCompPtr =
1862                             resElemPtr + (resEntry.isRowMajor ? rowNdx * resEntry.matrixStride + colNdx * compSize :
1863                                                                 colNdx * resEntry.matrixStride + rowNdx * compSize);
1864                         const uint8_t *refCompPtr =
1865                             refElemPtr + (refEntry.isRowMajor ? rowNdx * refEntry.matrixStride + colNdx * compSize :
1866                                                                 colNdx * refEntry.matrixStride + rowNdx * compSize);
1867 
1868                         DE_ASSERT((intptr_t)(refCompPtr + compSize) - (intptr_t)refBlockPtr.ptr <=
1869                                   (intptr_t)refBlockPtr.size);
1870                         DE_ASSERT((intptr_t)(resCompPtr + compSize) - (intptr_t)resBlockPtr.ptr <=
1871                                   (intptr_t)resBlockPtr.size);
1872 
1873                         isOk = isOk && compareComponents(scalarType, resCompPtr, refCompPtr, 1);
1874                     }
1875                 }
1876 
1877                 if (!isOk)
1878                 {
1879                     numFailed += 1;
1880                     if (numFailed < maxPrints)
1881                     {
1882                         std::ostringstream expected, got;
1883                         generateImmMatrixSrc(expected, refEntry.type, refEntry.matrixStride, refEntry.isRowMajor, false,
1884                                              -1, refElemPtr);
1885                         generateImmMatrixSrc(got, resEntry.type, resEntry.matrixStride, resEntry.isRowMajor, false, -1,
1886                                              resElemPtr);
1887                         log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx "
1888                             << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1889                             << "  expected " << expected.str() << "\n"
1890                             << "  got " << got.str() << TestLog::EndMessage;
1891                     }
1892                 }
1893             }
1894             else
1895             {
1896                 DE_ASSERT((intptr_t)(refElemPtr + scalarSize * compSize) - (intptr_t)refBlockPtr.ptr <=
1897                           (intptr_t)refBlockPtr.size);
1898                 DE_ASSERT((intptr_t)(resElemPtr + scalarSize * compSize) - (intptr_t)resBlockPtr.ptr <=
1899                           (intptr_t)resBlockPtr.size);
1900 
1901                 const bool isOk = compareComponents(scalarType, resElemPtr, refElemPtr, scalarSize);
1902 
1903                 if (!isOk)
1904                 {
1905                     numFailed += 1;
1906                     if (numFailed < maxPrints)
1907                     {
1908                         std::ostringstream expected, got;
1909                         generateImmScalarVectorSrc(expected, refEntry.type, refElemPtr);
1910                         generateImmScalarVectorSrc(got, resEntry.type, resElemPtr);
1911                         log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx "
1912                             << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1913                             << "  expected " << expected.str() << "\n"
1914                             << "  got " << got.str() << TestLog::EndMessage;
1915                     }
1916                 }
1917             }
1918         }
1919     }
1920 
1921     if (numFailed >= maxPrints)
1922         log << TestLog::Message << "... (" << numFailed << " failures for " << refEntry.name << " in total)"
1923             << TestLog::EndMessage;
1924 
1925     return numFailed == 0;
1926 }
1927 
compareData(tcu::TestLog & log,const BufferLayout & refLayout,const vector<BlockDataPtr> & refBlockPointers,const BufferLayout & resLayout,const vector<BlockDataPtr> & resBlockPointers)1928 bool compareData(tcu::TestLog &log, const BufferLayout &refLayout, const vector<BlockDataPtr> &refBlockPointers,
1929                  const BufferLayout &resLayout, const vector<BlockDataPtr> &resBlockPointers)
1930 {
1931     const int numBlocks = (int)refLayout.blocks.size();
1932     bool allOk          = true;
1933 
1934     for (int refBlockNdx = 0; refBlockNdx < numBlocks; refBlockNdx++)
1935     {
1936         const BlockLayoutEntry &refBlock = refLayout.blocks[refBlockNdx];
1937         const BlockDataPtr &refBlockPtr  = refBlockPointers[refBlockNdx];
1938         int resBlockNdx                  = resLayout.getBlockIndex(refBlock.name.c_str());
1939 
1940         if (resBlockNdx >= 0)
1941         {
1942             DE_ASSERT(de::inBounds(resBlockNdx, 0, (int)resBlockPointers.size()));
1943 
1944             const BlockDataPtr &resBlockPtr = resBlockPointers[resBlockNdx];
1945 
1946             for (vector<int>::const_iterator refVarNdxIter = refBlock.activeVarIndices.begin();
1947                  refVarNdxIter != refBlock.activeVarIndices.end(); refVarNdxIter++)
1948             {
1949                 const BufferVarLayoutEntry &refEntry = refLayout.bufferVars[*refVarNdxIter];
1950                 int resVarNdx                        = resLayout.getVariableIndex(refEntry.name.c_str());
1951 
1952                 if (resVarNdx >= 0)
1953                 {
1954                     const BufferVarLayoutEntry &resEntry = resLayout.bufferVars[resVarNdx];
1955                     allOk = compareBufferVarData(log, refEntry, refBlockPtr, resEntry, resBlockPtr) && allOk;
1956                 }
1957             }
1958         }
1959     }
1960 
1961     return allOk;
1962 }
1963 
getBlockAPIName(const BufferBlock & block,int instanceNdx)1964 string getBlockAPIName(const BufferBlock &block, int instanceNdx)
1965 {
1966     DE_ASSERT(block.isArray() || instanceNdx == 0);
1967     return block.getBlockName() + (block.isArray() ? ("[" + de::toString(instanceNdx) + "]") : string());
1968 }
1969 
1970 // \note Some implementations don't report block members in the order they are declared.
1971 //         For checking whether size has to be adjusted by some top-level array actual size,
1972 //         we only need to know a) whether there is a unsized top-level array, and b)
1973 //         what is stride of that array.
1974 
hasUnsizedArray(const BufferLayout & layout,const BlockLayoutEntry & entry)1975 static bool hasUnsizedArray(const BufferLayout &layout, const BlockLayoutEntry &entry)
1976 {
1977     for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end();
1978          ++varNdx)
1979     {
1980         if (isUnsizedArray(layout.bufferVars[*varNdx]))
1981             return true;
1982     }
1983 
1984     return false;
1985 }
1986 
getUnsizedArrayStride(const BufferLayout & layout,const BlockLayoutEntry & entry)1987 static int getUnsizedArrayStride(const BufferLayout &layout, const BlockLayoutEntry &entry)
1988 {
1989     for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end();
1990          ++varNdx)
1991     {
1992         const BufferVarLayoutEntry &varEntry = layout.bufferVars[*varNdx];
1993 
1994         if (varEntry.arraySize == 0)
1995             return varEntry.arrayStride;
1996         else if (varEntry.topLevelArraySize == 0)
1997             return varEntry.topLevelArrayStride;
1998     }
1999 
2000     return 0;
2001 }
2002 
computeBufferSizes(const ShaderInterface & interface,const BufferLayout & layout)2003 vector<int> computeBufferSizes(const ShaderInterface &interface, const BufferLayout &layout)
2004 {
2005     vector<int> sizes(layout.blocks.size());
2006 
2007     for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
2008     {
2009         const BufferBlock &block = interface.getBlock(declNdx);
2010         const bool isArray       = block.isArray();
2011         const int numInstances   = isArray ? block.getArraySize() : 1;
2012 
2013         for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
2014         {
2015             const string apiName = getBlockAPIName(block, instanceNdx);
2016             const int blockNdx   = layout.getBlockIndex(apiName);
2017 
2018             if (blockNdx >= 0)
2019             {
2020                 const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
2021                 const int baseSize                  = blockLayout.size;
2022                 const bool isLastUnsized            = hasUnsizedArray(layout, blockLayout);
2023                 const int lastArraySize             = isLastUnsized ? block.getLastUnsizedArraySize(instanceNdx) : 0;
2024                 const int stride                    = isLastUnsized ? getUnsizedArrayStride(layout, blockLayout) : 0;
2025 
2026                 sizes[blockNdx] = baseSize + lastArraySize * stride;
2027             }
2028         }
2029     }
2030 
2031     return sizes;
2032 }
2033 
getBlockDataPtr(const BufferLayout & layout,const BlockLayoutEntry & blockLayout,void * ptr,int bufferSize)2034 BlockDataPtr getBlockDataPtr(const BufferLayout &layout, const BlockLayoutEntry &blockLayout, void *ptr, int bufferSize)
2035 {
2036     const bool isLastUnsized = hasUnsizedArray(layout, blockLayout);
2037     const int baseSize       = blockLayout.size;
2038 
2039     if (isLastUnsized)
2040     {
2041         const int lastArrayStride = getUnsizedArrayStride(layout, blockLayout);
2042         const int lastArraySize   = (bufferSize - baseSize) / (lastArrayStride ? lastArrayStride : 1);
2043 
2044         DE_ASSERT(baseSize + lastArraySize * lastArrayStride == bufferSize);
2045 
2046         return BlockDataPtr(ptr, bufferSize, lastArraySize);
2047     }
2048     else
2049         return BlockDataPtr(ptr, bufferSize, 0);
2050 }
2051 
2052 struct Buffer
2053 {
2054     uint32_t buffer;
2055     int size;
2056 
Buffervkt::ssbo::__anon70c2de810211::Buffer2057     Buffer(uint32_t buffer_, int size_) : buffer(buffer_), size(size_)
2058     {
2059     }
Buffervkt::ssbo::__anon70c2de810211::Buffer2060     Buffer(void) : buffer(0), size(0)
2061     {
2062     }
2063 };
2064 
2065 struct BlockLocation
2066 {
2067     int index;
2068     int offset;
2069     int size;
2070 
BlockLocationvkt::ssbo::__anon70c2de810211::BlockLocation2071     BlockLocation(int index_, int offset_, int size_) : index(index_), offset(offset_), size(size_)
2072     {
2073     }
BlockLocationvkt::ssbo::__anon70c2de810211::BlockLocation2074     BlockLocation(void) : index(0), offset(0), size(0)
2075     {
2076     }
2077 };
2078 
initRefDataStorage(const ShaderInterface & interface,const BufferLayout & layout,RefDataStorage & storage)2079 void initRefDataStorage(const ShaderInterface &interface, const BufferLayout &layout, RefDataStorage &storage)
2080 {
2081     DE_ASSERT(storage.data.empty() && storage.pointers.empty());
2082 
2083     const vector<int> bufferSizes = computeBufferSizes(interface, layout);
2084     int totalSize                 = 0;
2085     const int vec4Alignment       = (int)sizeof(uint32_t) * 4;
2086 
2087     for (vector<int>::const_iterator sizeIter = bufferSizes.begin(); sizeIter != bufferSizes.end(); ++sizeIter)
2088     {
2089         // Include enough space for alignment of individual blocks
2090         totalSize += deRoundUp32(*sizeIter, vec4Alignment);
2091     }
2092 
2093     storage.data.resize(totalSize);
2094 
2095     // Pointers for each block.
2096     {
2097         uint8_t *basePtr = storage.data.empty() ? DE_NULL : &storage.data[0];
2098         int curOffset    = 0;
2099 
2100         DE_ASSERT(bufferSizes.size() == layout.blocks.size());
2101         DE_ASSERT(totalSize == 0 || basePtr);
2102 
2103         storage.pointers.resize(layout.blocks.size());
2104 
2105         for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
2106         {
2107             const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
2108             const int bufferSize                = bufferSizes[blockNdx];
2109 
2110             storage.pointers[blockNdx] = getBlockDataPtr(layout, blockLayout, basePtr + curOffset, bufferSize);
2111 
2112             // Ensure each new block starts fully aligned to avoid unaligned host accesses
2113             curOffset += deRoundUp32(bufferSize, vec4Alignment);
2114         }
2115     }
2116 }
2117 
blockLocationsToPtrs(const BufferLayout & layout,const vector<BlockLocation> & blockLocations,const vector<void * > & bufPtrs)2118 vector<BlockDataPtr> blockLocationsToPtrs(const BufferLayout &layout, const vector<BlockLocation> &blockLocations,
2119                                           const vector<void *> &bufPtrs)
2120 {
2121     vector<BlockDataPtr> blockPtrs(blockLocations.size());
2122 
2123     DE_ASSERT(layout.blocks.size() == blockLocations.size());
2124 
2125     for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
2126     {
2127         const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
2128         const BlockLocation &location       = blockLocations[blockNdx];
2129 
2130         blockPtrs[blockNdx] =
2131             getBlockDataPtr(layout, blockLayout, (uint8_t *)bufPtrs[location.index] + location.offset, location.size);
2132     }
2133 
2134     return blockPtrs;
2135 }
2136 
2137 } // namespace
2138 
allocateAndBindMemory(Context & context,vk::VkBuffer buffer,vk::MemoryRequirement memReqs)2139 de::MovePtr<vk::Allocation> allocateAndBindMemory(Context &context, vk::VkBuffer buffer, vk::MemoryRequirement memReqs)
2140 {
2141     const vk::DeviceInterface &vkd         = context.getDeviceInterface();
2142     const vk::VkMemoryRequirements bufReqs = vk::getBufferMemoryRequirements(vkd, context.getDevice(), buffer);
2143     de::MovePtr<vk::Allocation> memory     = context.getDefaultAllocator().allocate(bufReqs, memReqs);
2144 
2145     vkd.bindBufferMemory(context.getDevice(), buffer, memory->getMemory(), memory->getOffset());
2146 
2147     return memory;
2148 }
2149 
createBuffer(Context & context,vk::VkDeviceSize bufferSize,vk::VkBufferUsageFlags usageFlags)2150 vk::Move<vk::VkBuffer> createBuffer(Context &context, vk::VkDeviceSize bufferSize, vk::VkBufferUsageFlags usageFlags)
2151 {
2152     const vk::VkDevice vkDevice     = context.getDevice();
2153     const vk::DeviceInterface &vk   = context.getDeviceInterface();
2154     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2155 
2156     const vk::VkBufferCreateInfo bufferInfo = {
2157         vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2158         DE_NULL,                                  // const void* pNext;
2159         0u,                                       // VkBufferCreateFlags flags;
2160         bufferSize,                               // VkDeviceSize size;
2161         usageFlags,                               // VkBufferUsageFlags usage;
2162         vk::VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
2163         1u,                                       // uint32_t queueFamilyCount;
2164         &queueFamilyIndex                         // const uint32_t* pQueueFamilyIndices;
2165     };
2166 
2167     return vk::createBuffer(vk, vkDevice, &bufferInfo);
2168 }
2169 
2170 // SSBOLayoutCaseInstance
2171 
2172 class SSBOLayoutCaseInstance : public TestInstance
2173 {
2174 public:
2175     SSBOLayoutCaseInstance(Context &context, SSBOLayoutCase::BufferMode bufferMode, const ShaderInterface &interface,
2176                            const BufferLayout &refLayout, const RefDataStorage &initialData,
2177                            const RefDataStorage &writeData, bool usePhysStorageBuffer);
2178     virtual ~SSBOLayoutCaseInstance(void);
2179     virtual tcu::TestStatus iterate(void);
2180 
2181 private:
2182     SSBOLayoutCase::BufferMode m_bufferMode;
2183     const ShaderInterface &m_interface;
2184     const BufferLayout &m_refLayout;
2185     const RefDataStorage &m_initialData; // Initial data stored in buffer.
2186     const RefDataStorage &m_writeData;   // Data written by compute shader.
2187     const bool m_usePhysStorageBuffer;
2188 
2189     typedef de::SharedPtr<vk::Unique<vk::VkBuffer>> VkBufferSp;
2190     typedef de::SharedPtr<vk::Allocation> AllocationSp;
2191 
2192     std::vector<VkBufferSp> m_uniformBuffers;
2193     std::vector<AllocationSp> m_uniformAllocs;
2194 };
2195 
SSBOLayoutCaseInstance(Context & context,SSBOLayoutCase::BufferMode bufferMode,const ShaderInterface & interface,const BufferLayout & refLayout,const RefDataStorage & initialData,const RefDataStorage & writeData,bool usePhysStorageBuffer)2196 SSBOLayoutCaseInstance::SSBOLayoutCaseInstance(Context &context, SSBOLayoutCase::BufferMode bufferMode,
2197                                                const ShaderInterface &interface, const BufferLayout &refLayout,
2198                                                const RefDataStorage &initialData, const RefDataStorage &writeData,
2199                                                bool usePhysStorageBuffer)
2200     : TestInstance(context)
2201     , m_bufferMode(bufferMode)
2202     , m_interface(interface)
2203     , m_refLayout(refLayout)
2204     , m_initialData(initialData)
2205     , m_writeData(writeData)
2206     , m_usePhysStorageBuffer(usePhysStorageBuffer)
2207 {
2208 }
2209 
~SSBOLayoutCaseInstance(void)2210 SSBOLayoutCaseInstance::~SSBOLayoutCaseInstance(void)
2211 {
2212 }
2213 
iterate(void)2214 tcu::TestStatus SSBOLayoutCaseInstance::iterate(void)
2215 {
2216     // todo: add compute stage availability check
2217     const vk::DeviceInterface &vk   = m_context.getDeviceInterface();
2218     const vk::VkDevice device       = m_context.getDevice();
2219     const vk::VkQueue queue         = m_context.getUniversalQueue();
2220     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2221 
2222     // Create descriptor set
2223     const uint32_t acBufferSize = 1024;
2224     vk::Move<vk::VkBuffer> acBuffer(createBuffer(m_context, acBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
2225     de::UniquePtr<vk::Allocation> acBufferAlloc(
2226         allocateAndBindMemory(m_context, *acBuffer, vk::MemoryRequirement::HostVisible));
2227 
2228     deMemset(acBufferAlloc->getHostPtr(), 0, acBufferSize);
2229     flushMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
2230 
2231     vk::DescriptorSetLayoutBuilder setLayoutBuilder;
2232     vk::DescriptorPoolBuilder poolBuilder;
2233 
2234     setLayoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2235 
2236     int numBlocks         = 0;
2237     const int numBindings = m_interface.getNumBlocks();
2238     for (int bindingNdx = 0; bindingNdx < numBindings; bindingNdx++)
2239     {
2240         const BufferBlock &block = m_interface.getBlock(bindingNdx);
2241         if (block.isArray())
2242         {
2243             setLayoutBuilder.addArrayBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, block.getArraySize(),
2244                                              vk::VK_SHADER_STAGE_COMPUTE_BIT);
2245             numBlocks += block.getArraySize();
2246         }
2247         else
2248         {
2249             setLayoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2250             numBlocks += 1;
2251         }
2252     }
2253 
2254     poolBuilder.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (uint32_t)(1 + numBlocks));
2255 
2256     const vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(setLayoutBuilder.build(vk, device));
2257     const vk::Unique<vk::VkDescriptorPool> descriptorPool(
2258         poolBuilder.build(vk, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2259 
2260     const vk::VkDescriptorSetAllocateInfo allocInfo = {
2261         vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL, *descriptorPool, 1u, &descriptorSetLayout.get(),
2262     };
2263 
2264     const vk::Unique<vk::VkDescriptorSet> descriptorSet(allocateDescriptorSet(vk, device, &allocInfo));
2265     const vk::VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*acBuffer, 0ull, acBufferSize);
2266 
2267     vk::DescriptorSetUpdateBuilder setUpdateBuilder;
2268     std::vector<vk::VkDescriptorBufferInfo> descriptors(numBlocks);
2269 
2270     setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u),
2271                                  vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo);
2272 
2273     vector<BlockDataPtr> mappedBlockPtrs;
2274 
2275     vk::VkFlags usageFlags   = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
2276     bool memoryDeviceAddress = false;
2277     if (m_usePhysStorageBuffer)
2278     {
2279         usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
2280         if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
2281             memoryDeviceAddress = true;
2282     }
2283 
2284     // Upload base buffers
2285     const std::vector<int> bufferSizes = computeBufferSizes(m_interface, m_refLayout);
2286     {
2287         std::vector<void *> mapPtrs;
2288         std::vector<BlockLocation> blockLocations(numBlocks);
2289 
2290         DE_ASSERT(bufferSizes.size() == m_refLayout.blocks.size());
2291 
2292         if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2293         {
2294             mapPtrs.resize(numBlocks);
2295             for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2296             {
2297                 const uint32_t bufferSize = bufferSizes[blockNdx];
2298                 DE_ASSERT(bufferSize > 0);
2299 
2300                 blockLocations[blockNdx] = BlockLocation(blockNdx, 0, bufferSize);
2301 
2302                 vk::Move<vk::VkBuffer> buffer     = createBuffer(m_context, bufferSize, usageFlags);
2303                 de::MovePtr<vk::Allocation> alloc = allocateAndBindMemory(
2304                     m_context, *buffer,
2305                     vk::MemoryRequirement::HostVisible |
2306                         (memoryDeviceAddress ? vk::MemoryRequirement::DeviceAddress : vk::MemoryRequirement::Any));
2307 
2308                 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, 0ull, bufferSize);
2309 
2310                 mapPtrs[blockNdx] = alloc->getHostPtr();
2311 
2312                 m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2313                 m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2314             }
2315         }
2316         else
2317         {
2318             DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2319 
2320             vk::VkPhysicalDeviceProperties properties;
2321             m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
2322             const int bindingAlignment = (int)properties.limits.minStorageBufferOffsetAlignment;
2323             int curOffset              = 0;
2324             for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2325             {
2326                 const int bufferSize = bufferSizes[blockNdx];
2327                 DE_ASSERT(bufferSize > 0);
2328 
2329                 if (bindingAlignment > 0)
2330                     curOffset = deRoundUp32(curOffset, bindingAlignment);
2331 
2332                 blockLocations[blockNdx] = BlockLocation(0, curOffset, bufferSize);
2333                 curOffset += bufferSize;
2334             }
2335 
2336             const int totalBufferSize         = curOffset;
2337             vk::Move<vk::VkBuffer> buffer     = createBuffer(m_context, totalBufferSize, usageFlags);
2338             de::MovePtr<vk::Allocation> alloc = allocateAndBindMemory(
2339                 m_context, *buffer,
2340                 vk::MemoryRequirement::HostVisible |
2341                     (memoryDeviceAddress ? vk::MemoryRequirement::DeviceAddress : vk::MemoryRequirement::Any));
2342 
2343             mapPtrs.push_back(alloc->getHostPtr());
2344 
2345             for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2346             {
2347                 const uint32_t bufferSize = bufferSizes[blockNdx];
2348                 const uint32_t offset     = blockLocations[blockNdx].offset;
2349 
2350                 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, offset, bufferSize);
2351             }
2352 
2353             m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2354             m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2355         }
2356 
2357         // Update remaining bindings
2358         {
2359             int blockNdx = 0;
2360             for (int bindingNdx = 0; bindingNdx < numBindings; ++bindingNdx)
2361             {
2362                 const BufferBlock &block     = m_interface.getBlock(bindingNdx);
2363                 const int numBlocksInBinding = (block.isArray() ? block.getArraySize() : 1);
2364 
2365                 setUpdateBuilder.writeArray(
2366                     *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(bindingNdx + 1),
2367                     vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBlocksInBinding, &descriptors[blockNdx]);
2368 
2369                 blockNdx += numBlocksInBinding;
2370             }
2371         }
2372 
2373         // Copy the initial data to the storage buffers
2374         {
2375             mappedBlockPtrs = blockLocationsToPtrs(m_refLayout, blockLocations, mapPtrs);
2376             copyData(m_refLayout, mappedBlockPtrs, m_refLayout, m_initialData.pointers);
2377 
2378             for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2379             {
2380                 vk::Allocation *alloc = m_uniformAllocs[allocNdx].get();
2381                 flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), VK_WHOLE_SIZE);
2382             }
2383         }
2384     }
2385 
2386     std::vector<vk::VkDeviceAddress> gpuAddrs;
2387     // Query the buffer device addresses and push them via push constants
2388     if (m_usePhysStorageBuffer)
2389     {
2390         //const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
2391 
2392         vk::VkBufferDeviceAddressInfo info = {
2393             vk::VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
2394             DE_NULL,                                          // const void* pNext;
2395             0,                                                // VkBuffer            buffer
2396         };
2397 
2398         for (uint32_t i = 0; i < descriptors.size(); ++i)
2399         {
2400             info.buffer = descriptors[i].buffer;
2401             vk::VkDeviceAddress addr;
2402             //if (useKHR)
2403             addr = vk.getBufferDeviceAddress(device, &info);
2404             //else
2405             // addr = vk.getBufferDeviceAddressEXT(device, &info);
2406             addr += descriptors[i].offset;
2407             gpuAddrs.push_back(addr);
2408         }
2409     }
2410 
2411     setUpdateBuilder.update(vk, device);
2412 
2413     const vk::VkPushConstantRange pushConstRange = {
2414         vk::VK_SHADER_STAGE_COMPUTE_BIT,                             // VkShaderStageFlags    stageFlags
2415         0,                                                           // uint32_t                offset
2416         (uint32_t)(sizeof(vk::VkDeviceAddress) * descriptors.size()) // uint32_t                size
2417     };
2418 
2419     // must fit in spec min max
2420     DE_ASSERT(pushConstRange.size <= 128);
2421 
2422     const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
2423         vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2424         DE_NULL,                                           // const void* pNext;
2425         (vk::VkPipelineLayoutCreateFlags)0,
2426         1u,                               // uint32_t descriptorSetCount;
2427         &*descriptorSetLayout,            // const VkDescriptorSetLayout* pSetLayouts;
2428         m_usePhysStorageBuffer ? 1u : 0u, // uint32_t pushConstantRangeCount;
2429         &pushConstRange,                  // const VkPushConstantRange* pPushConstantRanges;
2430     };
2431     vk::Move<vk::VkPipelineLayout> pipelineLayout(createPipelineLayout(vk, device, &pipelineLayoutParams));
2432 
2433     m_context.getTestContext().touchWatchdogAndDisableIntervalTimeLimit();
2434 
2435     vk::Move<vk::VkShaderModule> shaderModule(
2436         createShaderModule(vk, device, m_context.getBinaryCollection().get("compute"), 0));
2437     const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
2438         vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2439         DE_NULL,                                                 // const void* pNext;
2440         (vk::VkPipelineShaderStageCreateFlags)0,
2441         vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStage stage;
2442         *shaderModule,                   // VkShader shader;
2443         "main",                          //
2444         DE_NULL,                         // const VkSpecializationInfo* pSpecializationInfo;
2445     };
2446     const vk::VkComputePipelineCreateInfo pipelineCreateInfo = {
2447         vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2448         DE_NULL,                                            // const void* pNext;
2449         0,                                                  // VkPipelineCreateFlags flags;
2450         pipelineShaderStageParams,                          // VkPipelineShaderStageCreateInfo stage;
2451         *pipelineLayout,                                    // VkPipelineLayout layout;
2452         DE_NULL,                                            // VkPipeline basePipelineHandle;
2453         0,                                                  // int32_t basePipelineIndex;
2454     };
2455     vk::Move<vk::VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
2456 
2457     m_context.getTestContext().touchWatchdogAndEnableIntervalTimeLimit();
2458 
2459     vk::Move<vk::VkCommandPool> cmdPool(
2460         createCommandPool(vk, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
2461     vk::Move<vk::VkCommandBuffer> cmdBuffer(
2462         allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2463 
2464     beginCommandBuffer(vk, *cmdBuffer, 0u);
2465 
2466     vk.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2467 
2468     if (gpuAddrs.size())
2469     {
2470         vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, vk::VK_SHADER_STAGE_COMPUTE_BIT, 0,
2471                             (uint32_t)(sizeof(vk::VkDeviceAddress) * gpuAddrs.size()), &gpuAddrs[0]);
2472     }
2473     vk.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
2474                              &descriptorSet.get(), 0u, DE_NULL);
2475 
2476     vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
2477 
2478     // Add barriers for shader writes to storage buffers before host access
2479     std::vector<vk::VkBufferMemoryBarrier> barriers;
2480     if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2481     {
2482         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2483         {
2484             const vk::VkBuffer uniformBuffer = m_uniformBuffers[blockNdx].get()->get();
2485 
2486             const vk::VkBufferMemoryBarrier barrier = {vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2487                                                        DE_NULL,
2488                                                        vk::VK_ACCESS_SHADER_WRITE_BIT,
2489                                                        vk::VK_ACCESS_HOST_READ_BIT,
2490                                                        VK_QUEUE_FAMILY_IGNORED,
2491                                                        VK_QUEUE_FAMILY_IGNORED,
2492                                                        uniformBuffer,
2493                                                        0u,
2494                                                        static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])};
2495             barriers.push_back(barrier);
2496         }
2497     }
2498     else
2499     {
2500         const vk::VkBuffer uniformBuffer = m_uniformBuffers[0].get()->get();
2501 
2502         vk::VkDeviceSize totalSize = 0;
2503         for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2504             totalSize += bufferSizes[bufferNdx];
2505 
2506         const vk::VkBufferMemoryBarrier barrier = {vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2507                                                    DE_NULL,
2508                                                    vk::VK_ACCESS_SHADER_WRITE_BIT,
2509                                                    vk::VK_ACCESS_HOST_READ_BIT,
2510                                                    VK_QUEUE_FAMILY_IGNORED,
2511                                                    VK_QUEUE_FAMILY_IGNORED,
2512                                                    uniformBuffer,
2513                                                    0u,
2514                                                    totalSize};
2515         barriers.push_back(barrier);
2516     }
2517     vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
2518                           (vk::VkDependencyFlags)0, 0u, DE_NULL, static_cast<uint32_t>(barriers.size()), &barriers[0],
2519                           0u, DE_NULL);
2520 
2521     endCommandBuffer(vk, *cmdBuffer);
2522 
2523     submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
2524 
2525     // Read back ac_numPassed data
2526     bool counterOk;
2527     {
2528         const int refCount = 1;
2529         int resCount       = 0;
2530 
2531         invalidateAlloc(vk, device, *acBufferAlloc);
2532 
2533         resCount = *((const int *)acBufferAlloc->getHostPtr());
2534 
2535         counterOk = (refCount == resCount);
2536         if (!counterOk)
2537         {
2538             m_context.getTestContext().getLog() << TestLog::Message << "Error: ac_numPassed = " << resCount
2539                                                 << ", expected " << refCount << TestLog::EndMessage;
2540         }
2541     }
2542 
2543     for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2544     {
2545         vk::Allocation *alloc = m_uniformAllocs[allocNdx].get();
2546         invalidateAlloc(vk, device, *alloc);
2547     }
2548 
2549     // Validate result
2550     const bool compareOk = compareData(m_context.getTestContext().getLog(), m_refLayout, m_writeData.pointers,
2551                                        m_refLayout, mappedBlockPtrs);
2552 
2553     if (compareOk && counterOk)
2554         return tcu::TestStatus::pass("Result comparison and counter values are OK");
2555     else if (!compareOk && counterOk)
2556         return tcu::TestStatus::fail("Result comparison failed");
2557     else if (compareOk && !counterOk)
2558         return tcu::TestStatus::fail("Counter value incorrect");
2559     else
2560         return tcu::TestStatus::fail("Result comparison and counter values are incorrect");
2561 }
2562 
2563 // SSBOLayoutCase.
2564 
SSBOLayoutCase(tcu::TestContext & testCtx,const char * name,BufferMode bufferMode,MatrixLoadFlags matrixLoadFlag,MatrixStoreFlags matrixStoreFlag,bool usePhysStorageBuffer)2565 SSBOLayoutCase::SSBOLayoutCase(tcu::TestContext &testCtx, const char *name, BufferMode bufferMode,
2566                                MatrixLoadFlags matrixLoadFlag, MatrixStoreFlags matrixStoreFlag,
2567                                bool usePhysStorageBuffer)
2568     : TestCase(testCtx, name)
2569     , m_bufferMode(bufferMode)
2570     , m_matrixLoadFlag(matrixLoadFlag)
2571     , m_matrixStoreFlag(matrixStoreFlag)
2572     , m_usePhysStorageBuffer(usePhysStorageBuffer)
2573 {
2574 }
2575 
~SSBOLayoutCase(void)2576 SSBOLayoutCase::~SSBOLayoutCase(void)
2577 {
2578 }
2579 
initPrograms(vk::SourceCollections & programCollection) const2580 void SSBOLayoutCase::initPrograms(vk::SourceCollections &programCollection) const
2581 {
2582     DE_ASSERT(!m_computeShaderSrc.empty());
2583 
2584     // Valid scalar layouts are a superset of valid relaxed layouts.  So check scalar layout first.
2585     if (usesScalarLayout(m_interface))
2586     {
2587         programCollection.glslSources.add("compute")
2588             << glu::ComputeSource(m_computeShaderSrc)
2589             << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0,
2590                                       vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS);
2591     }
2592     else if (usesRelaxedLayout(m_interface))
2593     {
2594         programCollection.glslSources.add("compute")
2595             << glu::ComputeSource(m_computeShaderSrc)
2596             << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0,
2597                                       vk::ShaderBuildOptions::FLAG_ALLOW_RELAXED_OFFSETS);
2598     }
2599     else
2600         programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc);
2601 }
2602 
createInstance(Context & context) const2603 TestInstance *SSBOLayoutCase::createInstance(Context &context) const
2604 {
2605     return new SSBOLayoutCaseInstance(context, m_bufferMode, m_interface, m_refLayout, m_initialData, m_writeData,
2606                                       m_usePhysStorageBuffer);
2607 }
2608 
checkSupport(Context & context) const2609 void SSBOLayoutCase::checkSupport(Context &context) const
2610 {
2611     if (!context.isDeviceFunctionalitySupported("VK_KHR_relaxed_block_layout") && usesRelaxedLayout(m_interface))
2612         TCU_THROW(NotSupportedError, "VK_KHR_relaxed_block_layout not supported");
2613     if (!context.get16BitStorageFeatures().storageBuffer16BitAccess && uses16BitStorage(m_interface))
2614         TCU_THROW(NotSupportedError, "storageBuffer16BitAccess not supported");
2615     if (!context.get8BitStorageFeatures().storageBuffer8BitAccess && uses8BitStorage(m_interface))
2616         TCU_THROW(NotSupportedError, "storageBuffer8BitAccess not supported");
2617     if (!context.getScalarBlockLayoutFeatures().scalarBlockLayout && usesScalarLayout(m_interface))
2618         TCU_THROW(NotSupportedError, "scalarBlockLayout not supported");
2619     if (m_usePhysStorageBuffer && !context.isBufferDeviceAddressSupported())
2620         TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
2621     if (usesDescriptorIndexing(m_interface) &&
2622         (!context.getDescriptorIndexingFeatures().shaderStorageBufferArrayNonUniformIndexing ||
2623          !context.getDescriptorIndexingFeatures().runtimeDescriptorArray))
2624         TCU_THROW(NotSupportedError, "Descriptor indexing over storage buffer not supported");
2625 
2626     const vk::VkPhysicalDeviceProperties &properties = context.getDeviceProperties();
2627     // Shader defines N+1 storage buffers: N to operate and one more to store the number of cases passed.
2628     uint32_t blockCount = 1u;
2629     for (int32_t blockIdx = 0u; blockIdx < m_interface.getNumBlocks(); blockIdx++)
2630     {
2631         blockCount +=
2632             m_interface.getBlock(blockIdx).getArraySize() ? m_interface.getBlock(blockIdx).getArraySize() : 1u;
2633     }
2634 
2635     if (properties.limits.maxPerStageDescriptorStorageBuffers < blockCount)
2636         TCU_THROW(NotSupportedError,
2637                   "Descriptor set storage buffers count higher than the maximum supported by the driver");
2638 }
2639 
delayedInit(void)2640 void SSBOLayoutCase::delayedInit(void)
2641 {
2642     computeReferenceLayout(m_refLayout, m_interface);
2643     initRefDataStorage(m_interface, m_refLayout, m_initialData);
2644     initRefDataStorage(m_interface, m_refLayout, m_writeData);
2645     generateValues(m_refLayout, m_initialData.pointers, deStringHash(getName()) ^ 0xad2f7214);
2646     generateValues(m_refLayout, m_writeData.pointers, deStringHash(getName()) ^ 0x25ca4e7);
2647     copyNonWrittenData(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers);
2648 
2649     m_computeShaderSrc = generateComputeShader(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers,
2650                                                m_matrixLoadFlag, m_matrixStoreFlag, m_usePhysStorageBuffer);
2651 }
2652 
2653 } // namespace ssbo
2654 } // namespace vkt
2655