1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief SSBO layout case.
24 *//*--------------------------------------------------------------------*/
25
26 #include "tcuFloat.hpp"
27 #include "deInt32.h"
28 #include "deMath.h"
29 #include "deMemory.h"
30 #include "deRandom.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deString.h"
33 #include "deStringUtil.hpp"
34 #include "gluContextInfo.hpp"
35 #include "gluShaderProgram.hpp"
36 #include "gluShaderUtil.hpp"
37 #include "gluVarType.hpp"
38 #include "gluVarTypeUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include "vktSSBOLayoutCase.hpp"
41
42 #include "vkBuilderUtil.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkPrograms.hpp"
45 #include "vkQueryUtil.hpp"
46 #include "vkRef.hpp"
47 #include "vkRefUtil.hpp"
48 #include "vkTypeUtil.hpp"
49 #include "vkCmdUtil.hpp"
50
51 #include "util/vktTypeComparisonUtil.hpp"
52
53 namespace vkt
54 {
55 namespace ssbo
56 {
57
58 using glu::StructMember;
59 using glu::StructType;
60 using glu::VarType;
61 using std::string;
62 using std::vector;
63 using tcu::TestLog;
64
65 struct LayoutFlagsFmt
66 {
67 uint32_t flags;
LayoutFlagsFmtvkt::ssbo::LayoutFlagsFmt68 LayoutFlagsFmt(uint32_t flags_) : flags(flags_)
69 {
70 }
71 };
72
operator <<(std::ostream & str,const LayoutFlagsFmt & fmt)73 std::ostream &operator<<(std::ostream &str, const LayoutFlagsFmt &fmt)
74 {
75 static const struct
76 {
77 uint32_t bit;
78 const char *token;
79 } bitDesc[] = {{LAYOUT_STD140, "std140"},
80 {LAYOUT_STD430, "std430"},
81 {LAYOUT_SCALAR, "scalar"},
82 {LAYOUT_ROW_MAJOR, "row_major"},
83 {LAYOUT_COLUMN_MAJOR, "column_major"}};
84
85 uint32_t remBits = fmt.flags;
86 for (int descNdx = 0; descNdx < DE_LENGTH_OF_ARRAY(bitDesc); descNdx++)
87 {
88 if (remBits & bitDesc[descNdx].bit)
89 {
90 if (remBits != fmt.flags)
91 str << ", ";
92 str << bitDesc[descNdx].token;
93 remBits &= ~bitDesc[descNdx].bit;
94 }
95 }
96 DE_ASSERT(remBits == 0);
97 return str;
98 }
99
100 // BufferVar implementation.
101
BufferVar(const char * name,const VarType & type,uint32_t flags)102 BufferVar::BufferVar(const char *name, const VarType &type, uint32_t flags)
103 : m_name(name)
104 , m_type(type)
105 , m_flags(flags)
106 , m_offset(~0u)
107 {
108 }
109
110 // BufferBlock implementation.
111
BufferBlock(const char * blockName)112 BufferBlock::BufferBlock(const char *blockName) : m_blockName(blockName), m_arraySize(-1), m_flags(0)
113 {
114 setArraySize(0);
115 }
116
setArraySize(int arraySize)117 void BufferBlock::setArraySize(int arraySize)
118 {
119 DE_ASSERT(arraySize >= 0);
120 m_lastUnsizedArraySizes.resize(arraySize == 0 ? 1 : arraySize, 0);
121 m_arraySize = arraySize;
122 }
123
operator <<(std::ostream & stream,const BlockLayoutEntry & entry)124 std::ostream &operator<<(std::ostream &stream, const BlockLayoutEntry &entry)
125 {
126 stream << entry.name << " { name = " << entry.name << ", size = " << entry.size << ", activeVarIndices = [";
127
128 for (vector<int>::const_iterator i = entry.activeVarIndices.begin(); i != entry.activeVarIndices.end(); i++)
129 {
130 if (i != entry.activeVarIndices.begin())
131 stream << ", ";
132 stream << *i;
133 }
134
135 stream << "] }";
136 return stream;
137 }
138
isUnsizedArray(const BufferVarLayoutEntry & entry)139 static bool isUnsizedArray(const BufferVarLayoutEntry &entry)
140 {
141 DE_ASSERT(entry.arraySize != 0 || entry.topLevelArraySize != 0);
142 return entry.arraySize == 0 || entry.topLevelArraySize == 0;
143 }
144
operator <<(std::ostream & stream,const BufferVarLayoutEntry & entry)145 std::ostream &operator<<(std::ostream &stream, const BufferVarLayoutEntry &entry)
146 {
147 stream << entry.name << " { type = " << glu::getDataTypeName(entry.type) << ", blockNdx = " << entry.blockNdx
148 << ", offset = " << entry.offset << ", arraySize = " << entry.arraySize
149 << ", arrayStride = " << entry.arrayStride << ", matrixStride = " << entry.matrixStride
150 << ", topLevelArraySize = " << entry.topLevelArraySize
151 << ", topLevelArrayStride = " << entry.topLevelArrayStride
152 << ", isRowMajor = " << (entry.isRowMajor ? "true" : "false") << " }";
153 return stream;
154 }
155
156 // \todo [2012-01-24 pyry] Speed up lookups using hash.
157
getVariableIndex(const string & name) const158 int BufferLayout::getVariableIndex(const string &name) const
159 {
160 for (int ndx = 0; ndx < (int)bufferVars.size(); ndx++)
161 {
162 if (bufferVars[ndx].name == name)
163 return ndx;
164 }
165 return -1;
166 }
167
getBlockIndex(const string & name) const168 int BufferLayout::getBlockIndex(const string &name) const
169 {
170 for (int ndx = 0; ndx < (int)blocks.size(); ndx++)
171 {
172 if (blocks[ndx].name == name)
173 return ndx;
174 }
175 return -1;
176 }
177
178 // ShaderInterface implementation.
179
ShaderInterface(void)180 ShaderInterface::ShaderInterface(void)
181 {
182 }
183
~ShaderInterface(void)184 ShaderInterface::~ShaderInterface(void)
185 {
186 for (std::vector<StructType *>::iterator i = m_structs.begin(); i != m_structs.end(); i++)
187 delete *i;
188
189 for (std::vector<BufferBlock *>::iterator i = m_bufferBlocks.begin(); i != m_bufferBlocks.end(); i++)
190 delete *i;
191 }
192
allocStruct(const char * name)193 StructType &ShaderInterface::allocStruct(const char *name)
194 {
195 m_structs.reserve(m_structs.size() + 1);
196 m_structs.push_back(new StructType(name));
197 return *m_structs.back();
198 }
199
200 struct StructNameEquals
201 {
202 std::string name;
203
StructNameEqualsvkt::ssbo::StructNameEquals204 StructNameEquals(const char *name_) : name(name_)
205 {
206 }
207
operator ()vkt::ssbo::StructNameEquals208 bool operator()(const StructType *type) const
209 {
210 return type->getTypeName() && name == type->getTypeName();
211 }
212 };
213
findStruct(const char * name) const214 const StructType *ShaderInterface::findStruct(const char *name) const
215 {
216 std::vector<StructType *>::const_iterator pos =
217 std::find_if(m_structs.begin(), m_structs.end(), StructNameEquals(name));
218 return pos != m_structs.end() ? *pos : DE_NULL;
219 }
220
getNamedStructs(std::vector<const StructType * > & structs) const221 void ShaderInterface::getNamedStructs(std::vector<const StructType *> &structs) const
222 {
223 for (std::vector<StructType *>::const_iterator i = m_structs.begin(); i != m_structs.end(); i++)
224 {
225 if ((*i)->getTypeName() != DE_NULL)
226 structs.push_back(*i);
227 }
228 }
229
allocBlock(const char * name)230 BufferBlock &ShaderInterface::allocBlock(const char *name)
231 {
232 m_bufferBlocks.reserve(m_bufferBlocks.size() + 1);
233 m_bufferBlocks.push_back(new BufferBlock(name));
234 return *m_bufferBlocks.back();
235 }
236
237 namespace // Utilities
238 {
239 // Layout computation.
240
getDataTypeByteSize(glu::DataType type)241 int getDataTypeByteSize(glu::DataType type)
242 {
243 if (deInRange32(type, glu::TYPE_UINT8, glu::TYPE_UINT8_VEC4) ||
244 deInRange32(type, glu::TYPE_INT8, glu::TYPE_INT8_VEC4))
245 {
246 return glu::getDataTypeScalarSize(type) * (int)sizeof(uint8_t);
247 }
248 else if (deInRange32(type, glu::TYPE_UINT16, glu::TYPE_UINT16_VEC4) ||
249 deInRange32(type, glu::TYPE_INT16, glu::TYPE_INT16_VEC4) ||
250 deInRange32(type, glu::TYPE_FLOAT16, glu::TYPE_FLOAT16_VEC4))
251 {
252 return glu::getDataTypeScalarSize(type) * (int)sizeof(uint16_t);
253 }
254 else
255 {
256 return glu::getDataTypeScalarSize(type) * (int)sizeof(uint32_t);
257 }
258 }
259
getDataTypeByteAlignment(glu::DataType type)260 int getDataTypeByteAlignment(glu::DataType type)
261 {
262 switch (type)
263 {
264 case glu::TYPE_FLOAT:
265 case glu::TYPE_INT:
266 case glu::TYPE_UINT:
267 case glu::TYPE_BOOL:
268 return 1 * (int)sizeof(uint32_t);
269
270 case glu::TYPE_FLOAT_VEC2:
271 case glu::TYPE_INT_VEC2:
272 case glu::TYPE_UINT_VEC2:
273 case glu::TYPE_BOOL_VEC2:
274 return 2 * (int)sizeof(uint32_t);
275
276 case glu::TYPE_FLOAT_VEC3:
277 case glu::TYPE_INT_VEC3:
278 case glu::TYPE_UINT_VEC3:
279 case glu::TYPE_BOOL_VEC3: // Fall-through to vec4
280
281 case glu::TYPE_FLOAT_VEC4:
282 case glu::TYPE_INT_VEC4:
283 case glu::TYPE_UINT_VEC4:
284 case glu::TYPE_BOOL_VEC4:
285 return 4 * (int)sizeof(uint32_t);
286
287 case glu::TYPE_UINT8:
288 case glu::TYPE_INT8:
289 return 1 * (int)sizeof(uint8_t);
290
291 case glu::TYPE_UINT8_VEC2:
292 case glu::TYPE_INT8_VEC2:
293 return 2 * (int)sizeof(uint8_t);
294
295 case glu::TYPE_UINT8_VEC3:
296 case glu::TYPE_INT8_VEC3: // Fall-through to vec4
297
298 case glu::TYPE_UINT8_VEC4:
299 case glu::TYPE_INT8_VEC4:
300 return 4 * (int)sizeof(uint8_t);
301
302 case glu::TYPE_UINT16:
303 case glu::TYPE_INT16:
304 case glu::TYPE_FLOAT16:
305 return 1 * (int)sizeof(uint16_t);
306
307 case glu::TYPE_UINT16_VEC2:
308 case glu::TYPE_INT16_VEC2:
309 case glu::TYPE_FLOAT16_VEC2:
310 return 2 * (int)sizeof(uint16_t);
311
312 case glu::TYPE_UINT16_VEC3:
313 case glu::TYPE_INT16_VEC3:
314 case glu::TYPE_FLOAT16_VEC3: // Fall-through to vec4
315
316 case glu::TYPE_UINT16_VEC4:
317 case glu::TYPE_INT16_VEC4:
318 case glu::TYPE_FLOAT16_VEC4:
319 return 4 * (int)sizeof(uint16_t);
320
321 default:
322 DE_ASSERT(false);
323 return 0;
324 }
325 }
326
computeStd140BaseAlignment(const VarType & type,uint32_t layoutFlags)327 int computeStd140BaseAlignment(const VarType &type, uint32_t layoutFlags)
328 {
329 const int vec4Alignment = (int)sizeof(uint32_t) * 4;
330
331 if (type.isBasicType())
332 {
333 glu::DataType basicType = type.getBasicType();
334
335 if (glu::isDataTypeMatrix(basicType))
336 {
337 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
338 const int vecSize =
339 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
340 const int vecAlign = deAlign32(getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize)), vec4Alignment);
341
342 return vecAlign;
343 }
344 else
345 return getDataTypeByteAlignment(basicType);
346 }
347 else if (type.isArrayType())
348 {
349 int elemAlignment = computeStd140BaseAlignment(type.getElementType(), layoutFlags);
350
351 // Round up to alignment of vec4
352 return deAlign32(elemAlignment, vec4Alignment);
353 }
354 else
355 {
356 DE_ASSERT(type.isStructType());
357
358 int maxBaseAlignment = 0;
359
360 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
361 memberIter != type.getStructPtr()->end(); memberIter++)
362 maxBaseAlignment =
363 de::max(maxBaseAlignment, computeStd140BaseAlignment(memberIter->getType(), layoutFlags));
364
365 return deAlign32(maxBaseAlignment, vec4Alignment);
366 }
367 }
368
computeStd430BaseAlignment(const VarType & type,uint32_t layoutFlags)369 int computeStd430BaseAlignment(const VarType &type, uint32_t layoutFlags)
370 {
371 // Otherwise identical to std140 except that alignment of structures and arrays
372 // are not rounded up to alignment of vec4.
373
374 if (type.isBasicType())
375 {
376 glu::DataType basicType = type.getBasicType();
377
378 if (glu::isDataTypeMatrix(basicType))
379 {
380 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
381 const int vecSize =
382 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
383 const int vecAlign = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
384 return vecAlign;
385 }
386 else
387 return getDataTypeByteAlignment(basicType);
388 }
389 else if (type.isArrayType())
390 {
391 return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
392 }
393 else
394 {
395 DE_ASSERT(type.isStructType());
396
397 int maxBaseAlignment = 0;
398
399 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
400 memberIter != type.getStructPtr()->end(); memberIter++)
401 maxBaseAlignment =
402 de::max(maxBaseAlignment, computeStd430BaseAlignment(memberIter->getType(), layoutFlags));
403
404 return maxBaseAlignment;
405 }
406 }
407
computeRelaxedBlockBaseAlignment(const VarType & type,uint32_t layoutFlags)408 int computeRelaxedBlockBaseAlignment(const VarType &type, uint32_t layoutFlags)
409 {
410 if (type.isBasicType())
411 {
412 glu::DataType basicType = type.getBasicType();
413
414 if (glu::isDataTypeVector(basicType))
415 return getDataTypeByteAlignment(glu::getDataTypeScalarType(basicType));
416
417 if (glu::isDataTypeMatrix(basicType))
418 {
419 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
420 const int vecSize =
421 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
422 const int vecAlign = getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
423 return vecAlign;
424 }
425 else
426 return getDataTypeByteAlignment(basicType);
427 }
428 else if (type.isArrayType())
429 return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
430 else
431 {
432 DE_ASSERT(type.isStructType());
433
434 int maxBaseAlignment = 0;
435 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
436 memberIter != type.getStructPtr()->end(); memberIter++)
437 maxBaseAlignment =
438 de::max(maxBaseAlignment, computeRelaxedBlockBaseAlignment(memberIter->getType(), layoutFlags));
439
440 return maxBaseAlignment;
441 }
442 }
443
computeScalarBlockAlignment(const VarType & type,uint32_t layoutFlags)444 int computeScalarBlockAlignment(const VarType &type, uint32_t layoutFlags)
445 {
446 if (type.isBasicType())
447 {
448 return getDataTypeByteAlignment(glu::getDataTypeScalarType(type.getBasicType()));
449 }
450 else if (type.isArrayType())
451 return computeScalarBlockAlignment(type.getElementType(), layoutFlags);
452 else
453 {
454 DE_ASSERT(type.isStructType());
455
456 int maxBaseAlignment = 0;
457 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
458 memberIter != type.getStructPtr()->end(); memberIter++)
459 maxBaseAlignment =
460 de::max(maxBaseAlignment, computeScalarBlockAlignment(memberIter->getType(), layoutFlags));
461
462 return maxBaseAlignment;
463 }
464 }
465
mergeLayoutFlags(uint32_t prevFlags,uint32_t newFlags)466 inline uint32_t mergeLayoutFlags(uint32_t prevFlags, uint32_t newFlags)
467 {
468 const uint32_t packingMask = LAYOUT_STD430 | LAYOUT_STD140 | LAYOUT_RELAXED | LAYOUT_SCALAR;
469 const uint32_t matrixMask = LAYOUT_ROW_MAJOR | LAYOUT_COLUMN_MAJOR;
470
471 uint32_t mergedFlags = 0;
472
473 mergedFlags |= ((newFlags & packingMask) ? newFlags : prevFlags) & packingMask;
474 mergedFlags |= ((newFlags & matrixMask) ? newFlags : prevFlags) & matrixMask;
475
476 return mergedFlags;
477 }
478
479 //! Appends all child elements to layout, returns value that should be appended to offset.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,int baseOffset,const std::string & curPrefix,const VarType & type,uint32_t layoutFlags)480 int computeReferenceLayout(BufferLayout &layout, int curBlockNdx, int baseOffset, const std::string &curPrefix,
481 const VarType &type, uint32_t layoutFlags)
482 {
483 // Reference layout uses std430 rules by default. std140 rules are
484 // choosen only for blocks that have std140 layout.
485 const int baseAlignment = (layoutFlags & LAYOUT_SCALAR) != 0 ? computeScalarBlockAlignment(type, layoutFlags) :
486 (layoutFlags & LAYOUT_STD140) != 0 ? computeStd140BaseAlignment(type, layoutFlags) :
487 (layoutFlags & LAYOUT_RELAXED) != 0 ?
488 computeRelaxedBlockBaseAlignment(type, layoutFlags) :
489 computeStd430BaseAlignment(type, layoutFlags);
490 int curOffset = deAlign32(baseOffset, baseAlignment);
491 const int topLevelArraySize = 1; // Default values
492 const int topLevelArrayStride = 0;
493
494 if (type.isBasicType())
495 {
496 const glu::DataType basicType = type.getBasicType();
497 BufferVarLayoutEntry entry;
498
499 entry.name = curPrefix;
500 entry.type = basicType;
501 entry.arraySize = 1;
502 entry.arrayStride = 0;
503 entry.matrixStride = 0;
504 entry.topLevelArraySize = topLevelArraySize;
505 entry.topLevelArrayStride = topLevelArrayStride;
506 entry.blockNdx = curBlockNdx;
507
508 if (glu::isDataTypeMatrix(basicType))
509 {
510 // Array of vectors as specified in rules 5 & 7.
511 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
512 const int vecSize =
513 isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType) : glu::getDataTypeMatrixNumRows(basicType);
514 const glu::DataType vecType = glu::getDataTypeFloatVec(vecSize);
515 const int numVecs =
516 isRowMajor ? glu::getDataTypeMatrixNumRows(basicType) : glu::getDataTypeMatrixNumColumns(basicType);
517 const int vecStride = (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
518
519 entry.offset = curOffset;
520 entry.matrixStride = vecStride;
521 entry.isRowMajor = isRowMajor;
522
523 curOffset += numVecs * entry.matrixStride;
524 }
525 else
526 {
527 if (!(layoutFlags & LAYOUT_SCALAR) && (layoutFlags & LAYOUT_RELAXED) && glu::isDataTypeVector(basicType) &&
528 (getDataTypeByteSize(basicType) <= 16 ?
529 curOffset / 16 != (curOffset + getDataTypeByteSize(basicType) - 1) / 16 :
530 curOffset % 16 != 0))
531 curOffset = deIntRoundToPow2(curOffset, 16);
532
533 // Scalar or vector.
534 entry.offset = curOffset;
535
536 curOffset += getDataTypeByteSize(basicType);
537 }
538
539 layout.bufferVars.push_back(entry);
540 }
541 else if (type.isArrayType())
542 {
543 const VarType &elemType = type.getElementType();
544
545 if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
546 {
547 // Array of scalars or vectors.
548 const glu::DataType elemBasicType = elemType.getBasicType();
549 const int stride = (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) : baseAlignment;
550 BufferVarLayoutEntry entry;
551
552 entry.name = curPrefix + "[0]"; // Array variables are always postfixed with [0]
553 entry.type = elemBasicType;
554 entry.blockNdx = curBlockNdx;
555 entry.offset = curOffset;
556 entry.arraySize = type.getArraySize();
557 entry.arrayStride = stride;
558 entry.matrixStride = 0;
559 entry.topLevelArraySize = topLevelArraySize;
560 entry.topLevelArrayStride = topLevelArrayStride;
561
562 curOffset += stride * type.getArraySize();
563
564 layout.bufferVars.push_back(entry);
565 }
566 else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
567 {
568 // Array of matrices.
569 const glu::DataType elemBasicType = elemType.getBasicType();
570 const bool isRowMajor = !!(layoutFlags & LAYOUT_ROW_MAJOR);
571 const int vecSize = isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType) :
572 glu::getDataTypeMatrixNumRows(elemBasicType);
573 const glu::DataType vecType = glu::getDataTypeFloatVec(vecSize);
574 const int numVecs = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType) :
575 glu::getDataTypeMatrixNumColumns(elemBasicType);
576 const int vecStride = (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
577 BufferVarLayoutEntry entry;
578
579 entry.name = curPrefix + "[0]"; // Array variables are always postfixed with [0]
580 entry.type = elemBasicType;
581 entry.blockNdx = curBlockNdx;
582 entry.offset = curOffset;
583 entry.arraySize = type.getArraySize();
584 entry.arrayStride = vecStride * numVecs;
585 entry.matrixStride = vecStride;
586 entry.isRowMajor = isRowMajor;
587 entry.topLevelArraySize = topLevelArraySize;
588 entry.topLevelArrayStride = topLevelArrayStride;
589
590 curOffset += entry.arrayStride * type.getArraySize();
591
592 layout.bufferVars.push_back(entry);
593 }
594 else
595 {
596 DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
597
598 for (int elemNdx = 0; elemNdx < type.getArraySize(); elemNdx++)
599 curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset,
600 curPrefix + "[" + de::toString(elemNdx) + "]",
601 type.getElementType(), layoutFlags);
602 }
603 }
604 else
605 {
606 DE_ASSERT(type.isStructType());
607
608 for (StructType::ConstIterator memberIter = type.getStructPtr()->begin();
609 memberIter != type.getStructPtr()->end(); memberIter++)
610 curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "." + memberIter->getName(),
611 memberIter->getType(), layoutFlags);
612
613 if (!(layoutFlags & LAYOUT_SCALAR))
614 curOffset = deAlign32(curOffset, baseAlignment);
615 }
616
617 return curOffset - baseOffset;
618 }
619
620 //! Appends all child elements to layout, returns offset increment.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,const std::string & blockPrefix,int baseOffset,const BufferVar & bufVar,uint32_t blockLayoutFlags)621 int computeReferenceLayout(BufferLayout &layout, int curBlockNdx, const std::string &blockPrefix, int baseOffset,
622 const BufferVar &bufVar, uint32_t blockLayoutFlags)
623 {
624 const VarType &varType = bufVar.getType();
625 const uint32_t combinedFlags = mergeLayoutFlags(blockLayoutFlags, bufVar.getFlags());
626
627 if (varType.isArrayType())
628 {
629 // Top-level arrays need special care.
630 const int topLevelArraySize = varType.getArraySize() == VarType::UNSIZED_ARRAY ? 0 : varType.getArraySize();
631 const string prefix = blockPrefix + bufVar.getName() + "[0]";
632 const bool isStd140 = (blockLayoutFlags & LAYOUT_STD140) != 0;
633 const int vec4Align = (int)sizeof(uint32_t) * 4;
634 const int baseAlignment =
635 (blockLayoutFlags & LAYOUT_SCALAR) != 0 ? computeScalarBlockAlignment(varType, combinedFlags) :
636 isStd140 ? computeStd140BaseAlignment(varType, combinedFlags) :
637 (blockLayoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(varType, combinedFlags) :
638 computeStd430BaseAlignment(varType, combinedFlags);
639 int curOffset = deAlign32(baseOffset, baseAlignment);
640 const VarType &elemType = varType.getElementType();
641
642 if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
643 {
644 // Array of scalars or vectors.
645 const glu::DataType elemBasicType = elemType.getBasicType();
646 const int elemBaseAlign = getDataTypeByteAlignment(elemBasicType);
647 const int stride = (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) :
648 isStd140 ? deAlign32(elemBaseAlign, vec4Align) :
649 elemBaseAlign;
650
651 BufferVarLayoutEntry entry;
652
653 entry.name = prefix;
654 entry.topLevelArraySize = 1;
655 entry.topLevelArrayStride = 0;
656 entry.type = elemBasicType;
657 entry.blockNdx = curBlockNdx;
658 entry.offset = curOffset;
659 entry.arraySize = topLevelArraySize;
660 entry.arrayStride = stride;
661 entry.matrixStride = 0;
662
663 layout.bufferVars.push_back(entry);
664
665 curOffset += stride * topLevelArraySize;
666 }
667 else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
668 {
669 // Array of matrices.
670 const glu::DataType elemBasicType = elemType.getBasicType();
671 const bool isRowMajor = !!(combinedFlags & LAYOUT_ROW_MAJOR);
672 const int vecSize = isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType) :
673 glu::getDataTypeMatrixNumRows(elemBasicType);
674 const int numVecs = isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType) :
675 glu::getDataTypeMatrixNumColumns(elemBasicType);
676 const glu::DataType vecType = glu::getDataTypeFloatVec(vecSize);
677 const int vecBaseAlign = getDataTypeByteAlignment(vecType);
678 const int stride = (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) :
679 isStd140 ? deAlign32(vecBaseAlign, vec4Align) :
680 vecBaseAlign;
681
682 BufferVarLayoutEntry entry;
683
684 entry.name = prefix;
685 entry.topLevelArraySize = 1;
686 entry.topLevelArrayStride = 0;
687 entry.type = elemBasicType;
688 entry.blockNdx = curBlockNdx;
689 entry.offset = curOffset;
690 entry.arraySize = topLevelArraySize;
691 entry.arrayStride = stride * numVecs;
692 entry.matrixStride = stride;
693 entry.isRowMajor = isRowMajor;
694
695 layout.bufferVars.push_back(entry);
696
697 curOffset += entry.arrayStride * topLevelArraySize;
698 }
699 else
700 {
701 DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
702
703 // Struct base alignment is not added multiple times as curOffset supplied to computeReferenceLayout
704 // was already aligned correctly. Thus computeReferenceLayout should not add any extra padding
705 // before struct. Padding after struct will be added as it should.
706 //
707 // Stride could be computed prior to creating child elements, but it would essentially require running
708 // the layout computation twice. Instead we fix stride to child elements afterwards.
709
710 const int firstChildNdx = (int)layout.bufferVars.size();
711
712 const int size = computeReferenceLayout(layout, curBlockNdx, deAlign32(curOffset, baseAlignment), prefix,
713 varType.getElementType(), combinedFlags);
714 const int stride = deAlign32(size, baseAlignment);
715
716 for (int childNdx = firstChildNdx; childNdx < (int)layout.bufferVars.size(); childNdx++)
717 {
718 layout.bufferVars[childNdx].topLevelArraySize = topLevelArraySize;
719 layout.bufferVars[childNdx].topLevelArrayStride = stride;
720 }
721
722 if (topLevelArraySize != 0)
723 curOffset += stride * (topLevelArraySize - 1) + size;
724 }
725
726 return curOffset - baseOffset;
727 }
728 else
729 return computeReferenceLayout(layout, curBlockNdx, baseOffset, blockPrefix + bufVar.getName(), varType,
730 combinedFlags);
731 }
732
computeReferenceLayout(BufferLayout & layout,ShaderInterface & interface)733 void computeReferenceLayout(BufferLayout &layout, ShaderInterface &interface)
734 {
735 int numBlocks = interface.getNumBlocks();
736
737 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
738 {
739 BufferBlock &block = interface.getBlock(blockNdx);
740 bool hasInstanceName = block.getInstanceName() != DE_NULL;
741 std::string blockPrefix = hasInstanceName ? (std::string(block.getBlockName()) + ".") : std::string("");
742 int curOffset = 0;
743 int activeBlockNdx = (int)layout.blocks.size();
744 int firstVarNdx = (int)layout.bufferVars.size();
745
746 size_t oldSize = layout.bufferVars.size();
747 for (BufferBlock::iterator varIter = block.begin(); varIter != block.end(); varIter++)
748 {
749 BufferVar &bufVar = *varIter;
750 curOffset +=
751 computeReferenceLayout(layout, activeBlockNdx, blockPrefix, curOffset, bufVar, block.getFlags());
752 if (block.getFlags() & LAYOUT_RELAXED)
753 {
754 DE_ASSERT(!(layout.bufferVars.size() <= oldSize));
755 bufVar.setOffset(layout.bufferVars[oldSize].offset);
756 }
757 oldSize = layout.bufferVars.size();
758 }
759
760 int varIndicesEnd = (int)layout.bufferVars.size();
761 int blockSize = curOffset;
762 int numInstances = block.isArray() ? block.getArraySize() : 1;
763
764 // Create block layout entries for each instance.
765 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
766 {
767 // Allocate entry for instance.
768 layout.blocks.push_back(BlockLayoutEntry());
769 BlockLayoutEntry &blockEntry = layout.blocks.back();
770
771 blockEntry.name = block.getBlockName();
772 blockEntry.size = blockSize;
773
774 // Compute active variable set for block.
775 for (int varNdx = firstVarNdx; varNdx < varIndicesEnd; varNdx++)
776 blockEntry.activeVarIndices.push_back(varNdx);
777
778 if (block.isArray())
779 blockEntry.name += "[" + de::toString(instanceNdx) + "]";
780 }
781 }
782 }
783
784 // Value generator.
785
generateValue(const BufferVarLayoutEntry & entry,int unsizedArraySize,void * basePtr,de::Random & rnd)786 void generateValue(const BufferVarLayoutEntry &entry, int unsizedArraySize, void *basePtr, de::Random &rnd)
787 {
788 const glu::DataType scalarType = glu::getDataTypeScalarType(entry.type);
789 const int scalarSize = glu::getDataTypeScalarSize(entry.type);
790 const int arraySize = entry.arraySize == 0 ? unsizedArraySize : entry.arraySize;
791 const int arrayStride = entry.arrayStride;
792 const int topLevelSize = entry.topLevelArraySize == 0 ? unsizedArraySize : entry.topLevelArraySize;
793 const int topLevelStride = entry.topLevelArrayStride;
794 const bool isMatrix = glu::isDataTypeMatrix(entry.type);
795 const int numVecs = isMatrix ? (entry.isRowMajor ? glu::getDataTypeMatrixNumRows(entry.type) :
796 glu::getDataTypeMatrixNumColumns(entry.type)) :
797 1;
798 const int vecSize = scalarSize / numVecs;
799 const size_t compSize = getDataTypeByteSize(scalarType);
800
801 DE_ASSERT(scalarSize % numVecs == 0);
802 DE_ASSERT(topLevelSize >= 0);
803 DE_ASSERT(arraySize >= 0);
804
805 for (int topElemNdx = 0; topElemNdx < topLevelSize; topElemNdx++)
806 {
807 uint8_t *const topElemPtr = (uint8_t *)basePtr + entry.offset + topElemNdx * topLevelStride;
808
809 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
810 {
811 uint8_t *const elemPtr = topElemPtr + elemNdx * arrayStride;
812
813 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
814 {
815 uint8_t *const vecPtr = elemPtr + (isMatrix ? vecNdx * entry.matrixStride : 0);
816
817 for (int compNdx = 0; compNdx < vecSize; compNdx++)
818 {
819 uint8_t *const compPtr = vecPtr + compSize * compNdx;
820
821 switch (scalarType)
822 {
823 case glu::TYPE_FLOAT:
824 *((float *)compPtr) = (float)rnd.getInt(-9, 9);
825 break;
826 case glu::TYPE_INT:
827 *((int *)compPtr) = rnd.getInt(-9, 9);
828 break;
829 case glu::TYPE_UINT:
830 *((uint32_t *)compPtr) = (uint32_t)rnd.getInt(0, 9);
831 break;
832 case glu::TYPE_INT8:
833 *((int8_t *)compPtr) = (int8_t)rnd.getInt(-9, 9);
834 break;
835 case glu::TYPE_UINT8:
836 *((uint8_t *)compPtr) = (uint8_t)rnd.getInt(0, 9);
837 break;
838 case glu::TYPE_INT16:
839 *((int16_t *)compPtr) = (int16_t)rnd.getInt(-9, 9);
840 break;
841 case glu::TYPE_UINT16:
842 *((uint16_t *)compPtr) = (uint16_t)rnd.getInt(0, 9);
843 break;
844 case glu::TYPE_FLOAT16:
845 *((tcu::float16_t *)compPtr) = tcu::Float16((float)rnd.getInt(-9, 9)).bits();
846 break;
847 // \note Random bit pattern is used for true values. Spec states that all non-zero values are
848 // interpreted as true but some implementations fail this.
849 case glu::TYPE_BOOL:
850 *((uint32_t *)compPtr) = rnd.getBool() ? rnd.getUint32() | 1u : 0u;
851 break;
852 default:
853 DE_ASSERT(false);
854 }
855 }
856 }
857 }
858 }
859 }
860
generateValues(const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,uint32_t seed)861 void generateValues(const BufferLayout &layout, const vector<BlockDataPtr> &blockPointers, uint32_t seed)
862 {
863 de::Random rnd(seed);
864 const int numBlocks = (int)layout.blocks.size();
865
866 DE_ASSERT(numBlocks == (int)blockPointers.size());
867
868 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
869 {
870 const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
871 const BlockDataPtr &blockPtr = blockPointers[blockNdx];
872 const int numEntries = (int)layout.blocks[blockNdx].activeVarIndices.size();
873
874 for (int entryNdx = 0; entryNdx < numEntries; entryNdx++)
875 {
876 const int varNdx = blockLayout.activeVarIndices[entryNdx];
877 const BufferVarLayoutEntry &varEntry = layout.bufferVars[varNdx];
878
879 generateValue(varEntry, blockPtr.lastUnsizedArraySize, blockPtr.ptr, rnd);
880 }
881 }
882 }
883
884 // Shader generator.
885
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const BufferBlock & bufferBlock)886 void collectUniqueBasicTypes(std::set<glu::DataType> &basicTypes, const BufferBlock &bufferBlock)
887 {
888 for (BufferBlock::const_iterator iter = bufferBlock.begin(); iter != bufferBlock.end(); ++iter)
889 vkt::typecomputil::collectUniqueBasicTypes(basicTypes, iter->getType());
890 }
891
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const ShaderInterface & interface)892 void collectUniqueBasicTypes(std::set<glu::DataType> &basicTypes, const ShaderInterface &interface)
893 {
894 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
895 collectUniqueBasicTypes(basicTypes, interface.getBlock(ndx));
896 }
897
generateCompareFuncs(std::ostream & str,const ShaderInterface & interface)898 void generateCompareFuncs(std::ostream &str, const ShaderInterface &interface)
899 {
900 std::set<glu::DataType> types;
901 std::set<glu::DataType> compareFuncs;
902
903 // Collect unique basic types
904 collectUniqueBasicTypes(types, interface);
905
906 // Set of compare functions required
907 for (std::set<glu::DataType>::const_iterator iter = types.begin(); iter != types.end(); ++iter)
908 {
909 vkt::typecomputil::getCompareDependencies(compareFuncs, *iter);
910 }
911
912 for (int type = 0; type < glu::TYPE_LAST; ++type)
913 {
914 if (compareFuncs.find(glu::DataType(type)) != compareFuncs.end())
915 str << vkt::typecomputil::getCompareFuncForType(glu::DataType(type));
916 }
917 }
918
usesRelaxedLayout(const ShaderInterface & interface)919 bool usesRelaxedLayout(const ShaderInterface &interface)
920 {
921 //If any of blocks has LAYOUT_RELAXED flag
922 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
923 {
924 if (interface.getBlock(ndx).getFlags() & LAYOUT_RELAXED)
925 return true;
926 }
927 return false;
928 }
929
uses16BitStorage(const ShaderInterface & interface)930 bool uses16BitStorage(const ShaderInterface &interface)
931 {
932 // If any of blocks has LAYOUT_16BIT_STORAGE flag
933 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
934 {
935 if (interface.getBlock(ndx).getFlags() & LAYOUT_16BIT_STORAGE)
936 return true;
937 }
938 return false;
939 }
940
uses8BitStorage(const ShaderInterface & interface)941 bool uses8BitStorage(const ShaderInterface &interface)
942 {
943 // If any of blocks has LAYOUT_8BIT_STORAGE flag
944 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
945 {
946 if (interface.getBlock(ndx).getFlags() & LAYOUT_8BIT_STORAGE)
947 return true;
948 }
949 return false;
950 }
951
usesScalarLayout(const ShaderInterface & interface)952 bool usesScalarLayout(const ShaderInterface &interface)
953 {
954 // If any of blocks has LAYOUT_SCALAR flag
955 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
956 {
957 if (interface.getBlock(ndx).getFlags() & LAYOUT_SCALAR)
958 return true;
959 }
960 return false;
961 }
962
usesDescriptorIndexing(const ShaderInterface & interface)963 bool usesDescriptorIndexing(const ShaderInterface &interface)
964 {
965 // If any of blocks has DESCRIPTOR_INDEXING flag
966 for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
967 {
968 if (interface.getBlock(ndx).getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
969 return true;
970 }
971 return false;
972 }
973
974 struct Indent
975 {
976 int level;
Indentvkt::ssbo::__anon70c2de810211::Indent977 Indent(int level_) : level(level_)
978 {
979 }
980 };
981
operator <<(std::ostream & str,const Indent & indent)982 std::ostream &operator<<(std::ostream &str, const Indent &indent)
983 {
984 for (int i = 0; i < indent.level; i++)
985 str << "\t";
986 return str;
987 }
988
generateDeclaration(std::ostream & src,const BufferVar & bufferVar,int indentLevel)989 void generateDeclaration(std::ostream &src, const BufferVar &bufferVar, int indentLevel)
990 {
991 // \todo [pyry] Qualifiers
992 if ((bufferVar.getFlags() & LAYOUT_MASK) != 0)
993 src << "layout(" << LayoutFlagsFmt(bufferVar.getFlags() & LAYOUT_MASK) << ") ";
994 else if (bufferVar.getOffset() != ~0u)
995 src << "layout(offset = " << bufferVar.getOffset() << ") ";
996
997 src << glu::declare(bufferVar.getType(), bufferVar.getName(), indentLevel);
998 }
999
generateDeclaration(std::ostream & src,const BufferBlock & block,int bindingPoint,bool usePhysStorageBuffer)1000 void generateDeclaration(std::ostream &src, const BufferBlock &block, int bindingPoint, bool usePhysStorageBuffer)
1001 {
1002 src << "layout(";
1003 if ((block.getFlags() & LAYOUT_MASK) != 0)
1004 src << LayoutFlagsFmt(block.getFlags() & LAYOUT_MASK) << ", ";
1005
1006 if (usePhysStorageBuffer)
1007 src << "buffer_reference";
1008 else
1009 src << "binding = " << bindingPoint;
1010
1011 src << ") ";
1012
1013 bool readonly = true;
1014 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1015 {
1016 const BufferVar &bufVar = *varIter;
1017 if (bufVar.getFlags() & ACCESS_WRITE)
1018 {
1019 readonly = false;
1020 break;
1021 }
1022 }
1023 if (readonly)
1024 src << "readonly ";
1025
1026 src << "buffer " << block.getBlockName();
1027 src << "\n{\n";
1028
1029 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1030 {
1031 src << Indent(1);
1032
1033 generateDeclaration(src, *varIter, 1 /* indent level */);
1034 src << ";\n";
1035 }
1036
1037 src << "}";
1038
1039 if (!usePhysStorageBuffer)
1040 {
1041 if (block.getInstanceName() != DE_NULL)
1042 {
1043 src << " " << block.getInstanceName();
1044 if (block.getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
1045 src << "[]";
1046 else if (block.isArray())
1047 src << "[" << block.getArraySize() << "]";
1048 }
1049 else
1050 DE_ASSERT(!block.isArray());
1051 }
1052
1053 src << ";\n";
1054 }
1055
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,bool singleCol,int colNumber,const void * valuePtr)1056 void generateImmMatrixSrc(std::ostream &src, glu::DataType basicType, int matrixStride, bool isRowMajor, bool singleCol,
1057 int colNumber, const void *valuePtr)
1058 {
1059 DE_ASSERT(glu::isDataTypeMatrix(basicType));
1060
1061 const int compSize = sizeof(uint32_t);
1062 const int numRows = glu::getDataTypeMatrixNumRows(basicType);
1063 const int numCols = glu::getDataTypeMatrixNumColumns(basicType);
1064
1065 src << glu::getDataTypeName(singleCol ? glu::getDataTypeMatrixColumnType(basicType) : basicType) << "(";
1066
1067 // Constructed in column-wise order.
1068 bool firstElem = true;
1069 for (int colNdx = 0; colNdx < numCols; colNdx++)
1070 {
1071 if (singleCol && colNdx != colNumber)
1072 continue;
1073
1074 for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1075 {
1076 const uint8_t *compPtr =
1077 (const uint8_t *)valuePtr +
1078 (isRowMajor ? rowNdx * matrixStride + colNdx * compSize : colNdx * matrixStride + rowNdx * compSize);
1079
1080 if (!firstElem)
1081 src << ", ";
1082
1083 src << de::floatToString(*((const float *)compPtr), 1);
1084 firstElem = false;
1085 }
1086 }
1087
1088 src << ")";
1089 }
1090
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,const void * valuePtr,const char * resultVar,const char * typeName,const string shaderName)1091 void generateImmMatrixSrc(std::ostream &src, glu::DataType basicType, int matrixStride, bool isRowMajor,
1092 const void *valuePtr, const char *resultVar, const char *typeName, const string shaderName)
1093 {
1094 const int compSize = sizeof(uint32_t);
1095 const int numRows = glu::getDataTypeMatrixNumRows(basicType);
1096 const int numCols = glu::getDataTypeMatrixNumColumns(basicType);
1097
1098 typeName = "float";
1099 for (int colNdex = 0; colNdex < numCols; colNdex++)
1100 {
1101 for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1102 {
1103 src << "\t" << resultVar << " = compare_" << typeName << "(" << shaderName << "[" << colNdex << "]["
1104 << rowNdex << "], ";
1105 const uint8_t *compPtr =
1106 (const uint8_t *)valuePtr + (isRowMajor ? rowNdex * matrixStride + colNdex * compSize :
1107 colNdex * matrixStride + rowNdex * compSize);
1108
1109 src << de::floatToString(*((const float *)compPtr), 1);
1110 src << ") && " << resultVar << ";\n";
1111 }
1112 }
1113
1114 typeName = "vec";
1115 for (int colNdex = 0; colNdex < numCols; colNdex++)
1116 {
1117 src << "\t" << resultVar << " = compare_" << typeName << numRows << "(" << shaderName << "[" << colNdex << "], "
1118 << typeName << numRows << "(";
1119 for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1120 {
1121 const uint8_t *compPtr =
1122 (const uint8_t *)valuePtr + (isRowMajor ? (rowNdex * matrixStride + colNdex * compSize) :
1123 (colNdex * matrixStride + rowNdex * compSize));
1124 src << de::floatToString(*((const float *)compPtr), 1);
1125
1126 if (rowNdex < numRows - 1)
1127 src << ", ";
1128 }
1129 src << ")) && " << resultVar << ";\n";
1130 }
1131 }
1132
generateImmScalarVectorSrc(std::ostream & src,glu::DataType basicType,const void * valuePtr)1133 void generateImmScalarVectorSrc(std::ostream &src, glu::DataType basicType, const void *valuePtr)
1134 {
1135 DE_ASSERT(glu::isDataTypeFloatOrVec(basicType) || glu::isDataTypeIntOrIVec(basicType) ||
1136 glu::isDataTypeUintOrUVec(basicType) || glu::isDataTypeBoolOrBVec(basicType) ||
1137 glu::isDataTypeExplicitPrecision(basicType));
1138
1139 const glu::DataType scalarType = glu::getDataTypeScalarType(basicType);
1140 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1141 const size_t compSize = getDataTypeByteSize(scalarType);
1142
1143 if (scalarSize > 1)
1144 src << glu::getDataTypeName(vkt::typecomputil::getPromoteType(basicType)) << "(";
1145
1146 for (int scalarNdx = 0; scalarNdx < scalarSize; scalarNdx++)
1147 {
1148 const uint8_t *compPtr = (const uint8_t *)valuePtr + scalarNdx * compSize;
1149
1150 if (scalarNdx > 0)
1151 src << ", ";
1152
1153 switch (scalarType)
1154 {
1155 case glu::TYPE_FLOAT16:
1156 src << de::floatToString(tcu::Float16(*((const tcu::float16_t *)compPtr)).asFloat(), 1);
1157 break;
1158 case glu::TYPE_FLOAT:
1159 src << de::floatToString(*((const float *)compPtr), 1);
1160 break;
1161 case glu::TYPE_INT8:
1162 src << (uint32_t) * ((const int8_t *)compPtr);
1163 break;
1164 case glu::TYPE_INT16:
1165 src << *((const int16_t *)compPtr);
1166 break;
1167 case glu::TYPE_INT:
1168 src << *((const int *)compPtr);
1169 break;
1170 case glu::TYPE_UINT8:
1171 src << (uint32_t) * ((const uint8_t *)compPtr) << "u";
1172 break;
1173 case glu::TYPE_UINT16:
1174 src << *((const uint16_t *)compPtr) << "u";
1175 break;
1176 case glu::TYPE_UINT:
1177 src << *((const uint32_t *)compPtr) << "u";
1178 break;
1179 case glu::TYPE_BOOL:
1180 src << (*((const uint32_t *)compPtr) != 0u ? "true" : "false");
1181 break;
1182 default:
1183 DE_ASSERT(false);
1184 }
1185 }
1186
1187 if (scalarSize > 1)
1188 src << ")";
1189 }
1190
getAPIName(const BufferBlock & block,const BufferVar & var,const glu::TypeComponentVector & accessPath)1191 string getAPIName(const BufferBlock &block, const BufferVar &var, const glu::TypeComponentVector &accessPath)
1192 {
1193 std::ostringstream name;
1194
1195 if (block.getInstanceName())
1196 name << block.getBlockName() << ".";
1197
1198 name << var.getName();
1199
1200 for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end();
1201 pathComp++)
1202 {
1203 if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1204 {
1205 const VarType curType = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1206 const StructType *structPtr = curType.getStructPtr();
1207
1208 name << "." << structPtr->getMember(pathComp->index).getName();
1209 }
1210 else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1211 {
1212 if (pathComp == accessPath.begin() || (pathComp + 1) == accessPath.end())
1213 name << "[0]"; // Top- / bottom-level array
1214 else
1215 name << "[" << pathComp->index << "]";
1216 }
1217 else
1218 DE_ASSERT(false);
1219 }
1220
1221 return name.str();
1222 }
1223
getShaderName(const BufferBlock & block,int instanceNdx,const BufferVar & var,const glu::TypeComponentVector & accessPath)1224 string getShaderName(const BufferBlock &block, int instanceNdx, const BufferVar &var,
1225 const glu::TypeComponentVector &accessPath)
1226 {
1227 std::ostringstream name;
1228
1229 if (block.getInstanceName())
1230 {
1231 name << block.getInstanceName();
1232
1233 if (block.getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
1234 name << "[nonuniformEXT(" << instanceNdx << ")]";
1235 else if (block.isArray())
1236 name << "[" << instanceNdx << "]";
1237
1238 name << ".";
1239 }
1240 else
1241 DE_ASSERT(instanceNdx == 0);
1242
1243 name << var.getName();
1244
1245 for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end();
1246 pathComp++)
1247 {
1248 if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1249 {
1250 const VarType curType = glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1251 const StructType *structPtr = curType.getStructPtr();
1252
1253 name << "." << structPtr->getMember(pathComp->index).getName();
1254 }
1255 else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1256 name << "[" << pathComp->index << "]";
1257 else
1258 DE_ASSERT(false);
1259 }
1260
1261 return name.str();
1262 }
1263
computeOffset(const BufferVarLayoutEntry & varLayout,const glu::TypeComponentVector & accessPath)1264 int computeOffset(const BufferVarLayoutEntry &varLayout, const glu::TypeComponentVector &accessPath)
1265 {
1266 const int topLevelNdx = (accessPath.size() > 1 && accessPath.front().type == glu::VarTypeComponent::ARRAY_ELEMENT) ?
1267 accessPath.front().index :
1268 0;
1269 const int bottomLevelNdx = (!accessPath.empty() && accessPath.back().type == glu::VarTypeComponent::ARRAY_ELEMENT) ?
1270 accessPath.back().index :
1271 0;
1272
1273 return varLayout.offset + varLayout.topLevelArrayStride * topLevelNdx + varLayout.arrayStride * bottomLevelNdx;
1274 }
1275
generateCompareSrc(std::ostream & src,const char * resultVar,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath,MatrixLoadFlags matrixLoadFlag,int & compareLimit)1276 void generateCompareSrc(std::ostream &src, const char *resultVar, const BufferLayout &bufferLayout,
1277 const BufferBlock &block, int instanceNdx, const BlockDataPtr &blockPtr,
1278 const BufferVar &bufVar, const glu::SubTypeAccess &accessPath, MatrixLoadFlags matrixLoadFlag,
1279 int &compareLimit)
1280 {
1281 const VarType curType = accessPath.getType();
1282
1283 // if limit for number of performed compare operations was reached then skip remaining compares
1284 if (compareLimit < 1)
1285 return;
1286
1287 if (curType.isArrayType())
1288 {
1289 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ?
1290 block.getLastUnsizedArraySize(instanceNdx) :
1291 curType.getArraySize();
1292
1293 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1294 generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar,
1295 accessPath.element(elemNdx), LOAD_FULL_MATRIX, compareLimit);
1296 }
1297 else if (curType.isStructType())
1298 {
1299 const int numMembers = curType.getStructPtr()->getNumMembers();
1300
1301 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1302 generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar,
1303 accessPath.member(memberNdx), LOAD_FULL_MATRIX, compareLimit);
1304 }
1305 else
1306 {
1307 DE_ASSERT(curType.isBasicType());
1308 compareLimit--;
1309
1310 const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1311 const int varNdx = bufferLayout.getVariableIndex(apiName);
1312
1313 DE_ASSERT(varNdx >= 0);
1314 {
1315 const BufferVarLayoutEntry &varLayout = bufferLayout.bufferVars[varNdx];
1316 const string shaderName = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1317 const glu::DataType basicType = curType.getBasicType();
1318 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1319 const char *typeName = glu::getDataTypeName(basicType);
1320 const void *valuePtr = (const uint8_t *)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1321
1322 if (isMatrix)
1323 {
1324 if (matrixLoadFlag == LOAD_MATRIX_COMPONENTS)
1325 generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr,
1326 resultVar, typeName, shaderName);
1327 else
1328 {
1329 src << "\t" << resultVar << " = compare_" << typeName << "(" << shaderName << ", ";
1330 generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, false, -1,
1331 valuePtr);
1332 src << ") && " << resultVar << ";\n";
1333 }
1334 }
1335 else
1336 {
1337 const char *castName = "";
1338 glu::DataType promoteType = vkt::typecomputil::getPromoteType(basicType);
1339 if (basicType != promoteType)
1340 castName = glu::getDataTypeName(promoteType);
1341
1342 src << "\t" << resultVar << " = compare_" << typeName << "(" << castName << "(" << shaderName << "), ";
1343 generateImmScalarVectorSrc(src, basicType, valuePtr);
1344 src << ") && " << resultVar << ";\n";
1345 }
1346 }
1347 }
1348 }
1349
generateCompareSrc(std::ostream & src,const char * resultVar,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,MatrixLoadFlags matrixLoadFlag)1350 void generateCompareSrc(std::ostream &src, const char *resultVar, const ShaderInterface &interface,
1351 const BufferLayout &layout, const vector<BlockDataPtr> &blockPointers,
1352 MatrixLoadFlags matrixLoadFlag)
1353 {
1354 // limit number of performed compare operations; some generated tests execute
1355 // large number of compare operations that result in slow compile times which
1356 // in turn result in test skip on slower platforms
1357 int compareLimit = 130;
1358
1359 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1360 {
1361 const BufferBlock &block = interface.getBlock(declNdx);
1362 const bool isArray = block.isArray();
1363 const int numInstances = isArray ? block.getArraySize() : 1;
1364
1365 DE_ASSERT(!isArray || block.getInstanceName());
1366
1367 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1368 {
1369 const string instanceName =
1370 block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1371 const int blockNdx = layout.getBlockIndex(instanceName);
1372 const BlockDataPtr &blockPtr = blockPointers[blockNdx];
1373
1374 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1375 {
1376 const BufferVar &bufVar = *varIter;
1377
1378 if ((bufVar.getFlags() & ACCESS_READ) == 0)
1379 continue; // Don't read from that variable.
1380
1381 generateCompareSrc(src, resultVar, layout, block, instanceNdx, blockPtr, bufVar,
1382 glu::SubTypeAccess(bufVar.getType()), matrixLoadFlag, compareLimit);
1383 }
1384 }
1385 }
1386 }
1387
1388 // \todo [2013-10-14 pyry] Almost identical to generateCompareSrc - unify?
1389
generateWriteSrc(std::ostream & src,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath,MatrixStoreFlags matrixStoreFlag)1390 void generateWriteSrc(std::ostream &src, const BufferLayout &bufferLayout, const BufferBlock &block, int instanceNdx,
1391 const BlockDataPtr &blockPtr, const BufferVar &bufVar, const glu::SubTypeAccess &accessPath,
1392 MatrixStoreFlags matrixStoreFlag)
1393 {
1394 const VarType curType = accessPath.getType();
1395
1396 if (curType.isArrayType())
1397 {
1398 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ?
1399 block.getLastUnsizedArraySize(instanceNdx) :
1400 curType.getArraySize();
1401
1402 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1403 generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx),
1404 matrixStoreFlag);
1405 }
1406 else if (curType.isStructType())
1407 {
1408 const int numMembers = curType.getStructPtr()->getNumMembers();
1409
1410 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1411 generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx),
1412 matrixStoreFlag);
1413 }
1414 else
1415 {
1416 DE_ASSERT(curType.isBasicType());
1417
1418 const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1419 const int varNdx = bufferLayout.getVariableIndex(apiName);
1420
1421 DE_ASSERT(varNdx >= 0);
1422 {
1423 const BufferVarLayoutEntry &varLayout = bufferLayout.bufferVars[varNdx];
1424 const string shaderName = getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1425 const glu::DataType basicType = curType.getBasicType();
1426 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1427 const void *valuePtr = (const uint8_t *)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1428
1429 const char *castName = "";
1430 glu::DataType promoteType = vkt::typecomputil::getPromoteType(basicType);
1431 if (basicType != promoteType)
1432 castName = glu::getDataTypeName((!isMatrix || matrixStoreFlag == STORE_FULL_MATRIX) ?
1433 basicType :
1434 glu::getDataTypeMatrixColumnType(basicType));
1435
1436 if (isMatrix)
1437 {
1438 switch (matrixStoreFlag)
1439 {
1440 case STORE_FULL_MATRIX:
1441 {
1442 src << "\t" << shaderName << " = " << castName << "(";
1443 generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, false, -1,
1444 valuePtr);
1445 src << ");\n";
1446 break;
1447 }
1448 case STORE_MATRIX_COLUMNS:
1449 {
1450 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
1451 for (int colIdx = 0; colIdx < numCols; ++colIdx)
1452 {
1453 src << "\t" << shaderName << "[" << colIdx << "]"
1454 << " = " << castName << "(";
1455 generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, true, colIdx,
1456 valuePtr);
1457 src << ");\n";
1458 }
1459 break;
1460 }
1461 default:
1462 DE_ASSERT(false);
1463 break;
1464 }
1465 }
1466 else
1467 {
1468 src << "\t" << shaderName << " = " << castName << "(";
1469 generateImmScalarVectorSrc(src, basicType, valuePtr);
1470 src << ");\n";
1471 }
1472 }
1473 }
1474 }
1475
generateWriteSrc(std::ostream & src,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,MatrixStoreFlags matrixStoreFlag)1476 void generateWriteSrc(std::ostream &src, const ShaderInterface &interface, const BufferLayout &layout,
1477 const vector<BlockDataPtr> &blockPointers, MatrixStoreFlags matrixStoreFlag)
1478 {
1479 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1480 {
1481 const BufferBlock &block = interface.getBlock(declNdx);
1482 const bool isArray = block.isArray();
1483 const int numInstances = isArray ? block.getArraySize() : 1;
1484
1485 DE_ASSERT(!isArray || block.getInstanceName());
1486
1487 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1488 {
1489 const string instanceName =
1490 block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1491 const int blockNdx = layout.getBlockIndex(instanceName);
1492 const BlockDataPtr &blockPtr = blockPointers[blockNdx];
1493
1494 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1495 {
1496 const BufferVar &bufVar = *varIter;
1497
1498 if ((bufVar.getFlags() & ACCESS_WRITE) == 0)
1499 continue; // Don't write to that variable.
1500
1501 generateWriteSrc(src, layout, block, instanceNdx, blockPtr, bufVar,
1502 glu::SubTypeAccess(bufVar.getType()), matrixStoreFlag);
1503 }
1504 }
1505 }
1506 }
1507
generateComputeShader(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & comparePtrs,const vector<BlockDataPtr> & writePtrs,MatrixLoadFlags matrixLoadFlag,MatrixStoreFlags matrixStoreFlag,bool usePhysStorageBuffer)1508 string generateComputeShader(const ShaderInterface &interface, const BufferLayout &layout,
1509 const vector<BlockDataPtr> &comparePtrs, const vector<BlockDataPtr> &writePtrs,
1510 MatrixLoadFlags matrixLoadFlag, MatrixStoreFlags matrixStoreFlag,
1511 bool usePhysStorageBuffer)
1512 {
1513 std::ostringstream src;
1514
1515 if (uses16BitStorage(interface) || uses8BitStorage(interface) || usesRelaxedLayout(interface) ||
1516 usesScalarLayout(interface) || usesDescriptorIndexing(interface))
1517 {
1518 src << "#version 450\n";
1519 }
1520 else
1521 src << "#version 310 es\n";
1522
1523 src << "#extension GL_EXT_shader_16bit_storage : enable\n";
1524 src << "#extension GL_EXT_shader_8bit_storage : enable\n";
1525 src << "#extension GL_EXT_scalar_block_layout : enable\n";
1526 src << "#extension GL_EXT_buffer_reference : enable\n";
1527 src << "#extension GL_EXT_nonuniform_qualifier : enable\n";
1528 src << "layout(local_size_x = 1) in;\n";
1529 src << "\n";
1530
1531 // Atomic counter for counting passed invocations.
1532 src << "layout(std140, binding = 0) buffer AcBlock { highp uint ac_numPassed; };\n\n";
1533
1534 std::vector<const StructType *> namedStructs;
1535 interface.getNamedStructs(namedStructs);
1536 for (std::vector<const StructType *>::const_iterator structIter = namedStructs.begin();
1537 structIter != namedStructs.end(); structIter++)
1538 src << glu::declare(*structIter) << ";\n";
1539
1540 {
1541 for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1542 {
1543 const BufferBlock &block = interface.getBlock(blockNdx);
1544 generateDeclaration(src, block, 1 + blockNdx, usePhysStorageBuffer);
1545 }
1546
1547 if (usePhysStorageBuffer)
1548 {
1549 src << "layout (push_constant, std430) uniform PC {\n";
1550 for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1551 {
1552 const BufferBlock &block = interface.getBlock(blockNdx);
1553 if (block.getInstanceName() != DE_NULL)
1554 {
1555 src << " " << block.getBlockName() << " " << block.getInstanceName();
1556 if (block.isArray())
1557 src << "[" << block.getArraySize() << "]";
1558 src << ";\n";
1559 }
1560 }
1561 src << "};\n";
1562 }
1563 }
1564
1565 // Comparison utilities.
1566 src << "\n";
1567 generateCompareFuncs(src, interface);
1568
1569 src << "\n"
1570 "void main (void)\n"
1571 "{\n"
1572 " bool allOk = true;\n";
1573
1574 // Value compare.
1575 generateCompareSrc(src, "allOk", interface, layout, comparePtrs, matrixLoadFlag);
1576
1577 src << " if (allOk)\n"
1578 << " ac_numPassed++;\n"
1579 << "\n";
1580
1581 // Value write.
1582 generateWriteSrc(src, interface, layout, writePtrs, matrixStoreFlag);
1583
1584 src << "}\n";
1585
1586 return src.str();
1587 }
1588
copyBufferVarData(const BufferVarLayoutEntry & dstEntry,const BlockDataPtr & dstBlockPtr,const BufferVarLayoutEntry & srcEntry,const BlockDataPtr & srcBlockPtr)1589 void copyBufferVarData(const BufferVarLayoutEntry &dstEntry, const BlockDataPtr &dstBlockPtr,
1590 const BufferVarLayoutEntry &srcEntry, const BlockDataPtr &srcBlockPtr)
1591 {
1592 DE_ASSERT(dstEntry.arraySize <= srcEntry.arraySize);
1593 DE_ASSERT(dstEntry.topLevelArraySize <= srcEntry.topLevelArraySize);
1594 DE_ASSERT(dstBlockPtr.lastUnsizedArraySize <= srcBlockPtr.lastUnsizedArraySize);
1595 DE_ASSERT(dstEntry.type == srcEntry.type);
1596
1597 uint8_t *const dstBasePtr = (uint8_t *)dstBlockPtr.ptr + dstEntry.offset;
1598 const uint8_t *const srcBasePtr = (const uint8_t *)srcBlockPtr.ptr + srcEntry.offset;
1599 const int scalarSize = glu::getDataTypeScalarSize(dstEntry.type);
1600 const bool isMatrix = glu::isDataTypeMatrix(dstEntry.type);
1601 glu::DataType scalarType = glu::getDataTypeScalarType(dstEntry.type);
1602 const size_t compSize = getDataTypeByteSize(scalarType);
1603 const int dstArraySize = dstEntry.arraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.arraySize;
1604 const int dstArrayStride = dstEntry.arrayStride;
1605 const int dstTopLevelSize =
1606 dstEntry.topLevelArraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.topLevelArraySize;
1607 const int dstTopLevelStride = dstEntry.topLevelArrayStride;
1608 const int srcArraySize = srcEntry.arraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.arraySize;
1609 const int srcArrayStride = srcEntry.arrayStride;
1610 const int srcTopLevelSize =
1611 srcEntry.topLevelArraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.topLevelArraySize;
1612 const int srcTopLevelStride = srcEntry.topLevelArrayStride;
1613
1614 DE_ASSERT(dstArraySize <= srcArraySize && dstTopLevelSize <= srcTopLevelSize);
1615 DE_UNREF(srcArraySize && srcTopLevelSize);
1616
1617 for (int topElemNdx = 0; topElemNdx < dstTopLevelSize; topElemNdx++)
1618 {
1619 uint8_t *const dstTopPtr = dstBasePtr + topElemNdx * dstTopLevelStride;
1620 const uint8_t *const srcTopPtr = srcBasePtr + topElemNdx * srcTopLevelStride;
1621
1622 for (int elementNdx = 0; elementNdx < dstArraySize; elementNdx++)
1623 {
1624 uint8_t *const dstElemPtr = dstTopPtr + elementNdx * dstArrayStride;
1625 const uint8_t *const srcElemPtr = srcTopPtr + elementNdx * srcArrayStride;
1626
1627 if (isMatrix)
1628 {
1629 const int numRows = glu::getDataTypeMatrixNumRows(dstEntry.type);
1630 const int numCols = glu::getDataTypeMatrixNumColumns(dstEntry.type);
1631
1632 for (int colNdx = 0; colNdx < numCols; colNdx++)
1633 {
1634 for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1635 {
1636 uint8_t *dstCompPtr =
1637 dstElemPtr + (dstEntry.isRowMajor ? rowNdx * dstEntry.matrixStride + colNdx * compSize :
1638 colNdx * dstEntry.matrixStride + rowNdx * compSize);
1639 const uint8_t *srcCompPtr =
1640 srcElemPtr + (srcEntry.isRowMajor ? rowNdx * srcEntry.matrixStride + colNdx * compSize :
1641 colNdx * srcEntry.matrixStride + rowNdx * compSize);
1642
1643 DE_ASSERT((intptr_t)(srcCompPtr + compSize) - (intptr_t)srcBlockPtr.ptr <=
1644 (intptr_t)srcBlockPtr.size);
1645 DE_ASSERT((intptr_t)(dstCompPtr + compSize) - (intptr_t)dstBlockPtr.ptr <=
1646 (intptr_t)dstBlockPtr.size);
1647 deMemcpy(dstCompPtr, srcCompPtr, compSize);
1648 }
1649 }
1650 }
1651 else
1652 {
1653 DE_ASSERT((intptr_t)(srcElemPtr + scalarSize * compSize) - (intptr_t)srcBlockPtr.ptr <=
1654 (intptr_t)srcBlockPtr.size);
1655 DE_ASSERT((intptr_t)(dstElemPtr + scalarSize * compSize) - (intptr_t)dstBlockPtr.ptr <=
1656 (intptr_t)dstBlockPtr.size);
1657 deMemcpy(dstElemPtr, srcElemPtr, scalarSize * compSize);
1658 }
1659 }
1660 }
1661 }
1662
copyData(const BufferLayout & dstLayout,const vector<BlockDataPtr> & dstBlockPointers,const BufferLayout & srcLayout,const vector<BlockDataPtr> & srcBlockPointers)1663 void copyData(const BufferLayout &dstLayout, const vector<BlockDataPtr> &dstBlockPointers,
1664 const BufferLayout &srcLayout, const vector<BlockDataPtr> &srcBlockPointers)
1665 {
1666 // \note Src layout is used as reference in case of activeVarIndices happens to be incorrect in dstLayout blocks.
1667 int numBlocks = (int)srcLayout.blocks.size();
1668
1669 for (int srcBlockNdx = 0; srcBlockNdx < numBlocks; srcBlockNdx++)
1670 {
1671 const BlockLayoutEntry &srcBlock = srcLayout.blocks[srcBlockNdx];
1672 const BlockDataPtr &srcBlockPtr = srcBlockPointers[srcBlockNdx];
1673 int dstBlockNdx = dstLayout.getBlockIndex(srcBlock.name.c_str());
1674
1675 if (dstBlockNdx >= 0)
1676 {
1677 DE_ASSERT(de::inBounds(dstBlockNdx, 0, (int)dstBlockPointers.size()));
1678
1679 const BlockDataPtr &dstBlockPtr = dstBlockPointers[dstBlockNdx];
1680
1681 for (vector<int>::const_iterator srcVarNdxIter = srcBlock.activeVarIndices.begin();
1682 srcVarNdxIter != srcBlock.activeVarIndices.end(); srcVarNdxIter++)
1683 {
1684 const BufferVarLayoutEntry &srcEntry = srcLayout.bufferVars[*srcVarNdxIter];
1685 int dstVarNdx = dstLayout.getVariableIndex(srcEntry.name.c_str());
1686
1687 if (dstVarNdx >= 0)
1688 copyBufferVarData(dstLayout.bufferVars[dstVarNdx], dstBlockPtr, srcEntry, srcBlockPtr);
1689 }
1690 }
1691 }
1692 }
1693
copyNonWrittenData(const BufferLayout & layout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & srcBlockPtr,const BlockDataPtr & dstBlockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath)1694 void copyNonWrittenData(const BufferLayout &layout, const BufferBlock &block, int instanceNdx,
1695 const BlockDataPtr &srcBlockPtr, const BlockDataPtr &dstBlockPtr, const BufferVar &bufVar,
1696 const glu::SubTypeAccess &accessPath)
1697 {
1698 const VarType curType = accessPath.getType();
1699
1700 if (curType.isArrayType())
1701 {
1702 const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ?
1703 block.getLastUnsizedArraySize(instanceNdx) :
1704 curType.getArraySize();
1705
1706 for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1707 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar,
1708 accessPath.element(elemNdx));
1709 }
1710 else if (curType.isStructType())
1711 {
1712 const int numMembers = curType.getStructPtr()->getNumMembers();
1713
1714 for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1715 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar,
1716 accessPath.member(memberNdx));
1717 }
1718 else
1719 {
1720 DE_ASSERT(curType.isBasicType());
1721
1722 const string apiName = getAPIName(block, bufVar, accessPath.getPath());
1723 const int varNdx = layout.getVariableIndex(apiName);
1724
1725 DE_ASSERT(varNdx >= 0);
1726 {
1727 const BufferVarLayoutEntry &varLayout = layout.bufferVars[varNdx];
1728 copyBufferVarData(varLayout, dstBlockPtr, varLayout, srcBlockPtr);
1729 }
1730 }
1731 }
1732
copyNonWrittenData(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & srcPtrs,const vector<BlockDataPtr> & dstPtrs)1733 void copyNonWrittenData(const ShaderInterface &interface, const BufferLayout &layout,
1734 const vector<BlockDataPtr> &srcPtrs, const vector<BlockDataPtr> &dstPtrs)
1735 {
1736 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1737 {
1738 const BufferBlock &block = interface.getBlock(declNdx);
1739 const bool isArray = block.isArray();
1740 const int numInstances = isArray ? block.getArraySize() : 1;
1741
1742 DE_ASSERT(!isArray || block.getInstanceName());
1743
1744 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1745 {
1746 const string instanceName =
1747 block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1748 const int blockNdx = layout.getBlockIndex(instanceName);
1749 const BlockDataPtr &srcBlockPtr = srcPtrs[blockNdx];
1750 const BlockDataPtr &dstBlockPtr = dstPtrs[blockNdx];
1751
1752 for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1753 {
1754 const BufferVar &bufVar = *varIter;
1755
1756 if (bufVar.getFlags() & ACCESS_WRITE)
1757 continue;
1758
1759 copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar,
1760 glu::SubTypeAccess(bufVar.getType()));
1761 }
1762 }
1763 }
1764 }
1765
compareComponents(glu::DataType scalarType,const void * ref,const void * res,int numComps)1766 bool compareComponents(glu::DataType scalarType, const void *ref, const void *res, int numComps)
1767 {
1768 if (scalarType == glu::TYPE_FLOAT)
1769 {
1770 const float threshold = 0.05f; // Same as used in shaders - should be fine for values being used.
1771
1772 for (int ndx = 0; ndx < numComps; ndx++)
1773 {
1774 const float refVal = *((const float *)ref + ndx);
1775 const float resVal = *((const float *)res + ndx);
1776
1777 if (deFloatAbs(resVal - refVal) >= threshold)
1778 return false;
1779 }
1780 }
1781 else if (scalarType == glu::TYPE_BOOL)
1782 {
1783 for (int ndx = 0; ndx < numComps; ndx++)
1784 {
1785 const uint32_t refVal = *((const uint32_t *)ref + ndx);
1786 const uint32_t resVal = *((const uint32_t *)res + ndx);
1787
1788 if ((refVal != 0) != (resVal != 0))
1789 return false;
1790 }
1791 }
1792 else if (scalarType == glu::TYPE_INT8 || scalarType == glu::TYPE_UINT8)
1793 {
1794 return deMemCmp(ref, res, numComps * sizeof(uint8_t)) == 0;
1795 }
1796 else if (scalarType == glu::TYPE_INT16 || scalarType == glu::TYPE_UINT16 || scalarType == glu::TYPE_FLOAT16)
1797 {
1798 return deMemCmp(ref, res, numComps * sizeof(uint16_t)) == 0;
1799 }
1800 else
1801 {
1802 DE_ASSERT(scalarType == glu::TYPE_INT || scalarType == glu::TYPE_UINT);
1803
1804 return deMemCmp(ref, res, numComps * sizeof(uint32_t)) == 0;
1805 }
1806
1807 return true;
1808 }
1809
compareBufferVarData(tcu::TestLog & log,const BufferVarLayoutEntry & refEntry,const BlockDataPtr & refBlockPtr,const BufferVarLayoutEntry & resEntry,const BlockDataPtr & resBlockPtr)1810 bool compareBufferVarData(tcu::TestLog &log, const BufferVarLayoutEntry &refEntry, const BlockDataPtr &refBlockPtr,
1811 const BufferVarLayoutEntry &resEntry, const BlockDataPtr &resBlockPtr)
1812 {
1813 DE_ASSERT(resEntry.arraySize <= refEntry.arraySize);
1814 DE_ASSERT(resEntry.topLevelArraySize <= refEntry.topLevelArraySize);
1815 DE_ASSERT(resBlockPtr.lastUnsizedArraySize <= refBlockPtr.lastUnsizedArraySize);
1816 DE_ASSERT(resEntry.type == refEntry.type);
1817
1818 uint8_t *const resBasePtr = (uint8_t *)resBlockPtr.ptr + resEntry.offset;
1819 const uint8_t *const refBasePtr = (const uint8_t *)refBlockPtr.ptr + refEntry.offset;
1820 const glu::DataType scalarType = glu::getDataTypeScalarType(refEntry.type);
1821 const int scalarSize = glu::getDataTypeScalarSize(resEntry.type);
1822 const bool isMatrix = glu::isDataTypeMatrix(resEntry.type);
1823 const size_t compSize = getDataTypeByteSize(scalarType);
1824 const int maxPrints = 3;
1825 int numFailed = 0;
1826
1827 const int resArraySize = resEntry.arraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.arraySize;
1828 const int resArrayStride = resEntry.arrayStride;
1829 const int resTopLevelSize =
1830 resEntry.topLevelArraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.topLevelArraySize;
1831 const int resTopLevelStride = resEntry.topLevelArrayStride;
1832 const int refArraySize = refEntry.arraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.arraySize;
1833 const int refArrayStride = refEntry.arrayStride;
1834 const int refTopLevelSize =
1835 refEntry.topLevelArraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.topLevelArraySize;
1836 const int refTopLevelStride = refEntry.topLevelArrayStride;
1837
1838 DE_ASSERT(resArraySize <= refArraySize && resTopLevelSize <= refTopLevelSize);
1839 DE_UNREF(refArraySize && refTopLevelSize);
1840
1841 for (int topElemNdx = 0; topElemNdx < resTopLevelSize; topElemNdx++)
1842 {
1843 uint8_t *const resTopPtr = resBasePtr + topElemNdx * resTopLevelStride;
1844 const uint8_t *const refTopPtr = refBasePtr + topElemNdx * refTopLevelStride;
1845
1846 for (int elementNdx = 0; elementNdx < resArraySize; elementNdx++)
1847 {
1848 uint8_t *const resElemPtr = resTopPtr + elementNdx * resArrayStride;
1849 const uint8_t *const refElemPtr = refTopPtr + elementNdx * refArrayStride;
1850
1851 if (isMatrix)
1852 {
1853 const int numRows = glu::getDataTypeMatrixNumRows(resEntry.type);
1854 const int numCols = glu::getDataTypeMatrixNumColumns(resEntry.type);
1855 bool isOk = true;
1856
1857 for (int colNdx = 0; colNdx < numCols; colNdx++)
1858 {
1859 for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1860 {
1861 uint8_t *resCompPtr =
1862 resElemPtr + (resEntry.isRowMajor ? rowNdx * resEntry.matrixStride + colNdx * compSize :
1863 colNdx * resEntry.matrixStride + rowNdx * compSize);
1864 const uint8_t *refCompPtr =
1865 refElemPtr + (refEntry.isRowMajor ? rowNdx * refEntry.matrixStride + colNdx * compSize :
1866 colNdx * refEntry.matrixStride + rowNdx * compSize);
1867
1868 DE_ASSERT((intptr_t)(refCompPtr + compSize) - (intptr_t)refBlockPtr.ptr <=
1869 (intptr_t)refBlockPtr.size);
1870 DE_ASSERT((intptr_t)(resCompPtr + compSize) - (intptr_t)resBlockPtr.ptr <=
1871 (intptr_t)resBlockPtr.size);
1872
1873 isOk = isOk && compareComponents(scalarType, resCompPtr, refCompPtr, 1);
1874 }
1875 }
1876
1877 if (!isOk)
1878 {
1879 numFailed += 1;
1880 if (numFailed < maxPrints)
1881 {
1882 std::ostringstream expected, got;
1883 generateImmMatrixSrc(expected, refEntry.type, refEntry.matrixStride, refEntry.isRowMajor, false,
1884 -1, refElemPtr);
1885 generateImmMatrixSrc(got, resEntry.type, resEntry.matrixStride, resEntry.isRowMajor, false, -1,
1886 resElemPtr);
1887 log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx "
1888 << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1889 << " expected " << expected.str() << "\n"
1890 << " got " << got.str() << TestLog::EndMessage;
1891 }
1892 }
1893 }
1894 else
1895 {
1896 DE_ASSERT((intptr_t)(refElemPtr + scalarSize * compSize) - (intptr_t)refBlockPtr.ptr <=
1897 (intptr_t)refBlockPtr.size);
1898 DE_ASSERT((intptr_t)(resElemPtr + scalarSize * compSize) - (intptr_t)resBlockPtr.ptr <=
1899 (intptr_t)resBlockPtr.size);
1900
1901 const bool isOk = compareComponents(scalarType, resElemPtr, refElemPtr, scalarSize);
1902
1903 if (!isOk)
1904 {
1905 numFailed += 1;
1906 if (numFailed < maxPrints)
1907 {
1908 std::ostringstream expected, got;
1909 generateImmScalarVectorSrc(expected, refEntry.type, refElemPtr);
1910 generateImmScalarVectorSrc(got, resEntry.type, resElemPtr);
1911 log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx "
1912 << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1913 << " expected " << expected.str() << "\n"
1914 << " got " << got.str() << TestLog::EndMessage;
1915 }
1916 }
1917 }
1918 }
1919 }
1920
1921 if (numFailed >= maxPrints)
1922 log << TestLog::Message << "... (" << numFailed << " failures for " << refEntry.name << " in total)"
1923 << TestLog::EndMessage;
1924
1925 return numFailed == 0;
1926 }
1927
compareData(tcu::TestLog & log,const BufferLayout & refLayout,const vector<BlockDataPtr> & refBlockPointers,const BufferLayout & resLayout,const vector<BlockDataPtr> & resBlockPointers)1928 bool compareData(tcu::TestLog &log, const BufferLayout &refLayout, const vector<BlockDataPtr> &refBlockPointers,
1929 const BufferLayout &resLayout, const vector<BlockDataPtr> &resBlockPointers)
1930 {
1931 const int numBlocks = (int)refLayout.blocks.size();
1932 bool allOk = true;
1933
1934 for (int refBlockNdx = 0; refBlockNdx < numBlocks; refBlockNdx++)
1935 {
1936 const BlockLayoutEntry &refBlock = refLayout.blocks[refBlockNdx];
1937 const BlockDataPtr &refBlockPtr = refBlockPointers[refBlockNdx];
1938 int resBlockNdx = resLayout.getBlockIndex(refBlock.name.c_str());
1939
1940 if (resBlockNdx >= 0)
1941 {
1942 DE_ASSERT(de::inBounds(resBlockNdx, 0, (int)resBlockPointers.size()));
1943
1944 const BlockDataPtr &resBlockPtr = resBlockPointers[resBlockNdx];
1945
1946 for (vector<int>::const_iterator refVarNdxIter = refBlock.activeVarIndices.begin();
1947 refVarNdxIter != refBlock.activeVarIndices.end(); refVarNdxIter++)
1948 {
1949 const BufferVarLayoutEntry &refEntry = refLayout.bufferVars[*refVarNdxIter];
1950 int resVarNdx = resLayout.getVariableIndex(refEntry.name.c_str());
1951
1952 if (resVarNdx >= 0)
1953 {
1954 const BufferVarLayoutEntry &resEntry = resLayout.bufferVars[resVarNdx];
1955 allOk = compareBufferVarData(log, refEntry, refBlockPtr, resEntry, resBlockPtr) && allOk;
1956 }
1957 }
1958 }
1959 }
1960
1961 return allOk;
1962 }
1963
getBlockAPIName(const BufferBlock & block,int instanceNdx)1964 string getBlockAPIName(const BufferBlock &block, int instanceNdx)
1965 {
1966 DE_ASSERT(block.isArray() || instanceNdx == 0);
1967 return block.getBlockName() + (block.isArray() ? ("[" + de::toString(instanceNdx) + "]") : string());
1968 }
1969
1970 // \note Some implementations don't report block members in the order they are declared.
1971 // For checking whether size has to be adjusted by some top-level array actual size,
1972 // we only need to know a) whether there is a unsized top-level array, and b)
1973 // what is stride of that array.
1974
hasUnsizedArray(const BufferLayout & layout,const BlockLayoutEntry & entry)1975 static bool hasUnsizedArray(const BufferLayout &layout, const BlockLayoutEntry &entry)
1976 {
1977 for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end();
1978 ++varNdx)
1979 {
1980 if (isUnsizedArray(layout.bufferVars[*varNdx]))
1981 return true;
1982 }
1983
1984 return false;
1985 }
1986
getUnsizedArrayStride(const BufferLayout & layout,const BlockLayoutEntry & entry)1987 static int getUnsizedArrayStride(const BufferLayout &layout, const BlockLayoutEntry &entry)
1988 {
1989 for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end();
1990 ++varNdx)
1991 {
1992 const BufferVarLayoutEntry &varEntry = layout.bufferVars[*varNdx];
1993
1994 if (varEntry.arraySize == 0)
1995 return varEntry.arrayStride;
1996 else if (varEntry.topLevelArraySize == 0)
1997 return varEntry.topLevelArrayStride;
1998 }
1999
2000 return 0;
2001 }
2002
computeBufferSizes(const ShaderInterface & interface,const BufferLayout & layout)2003 vector<int> computeBufferSizes(const ShaderInterface &interface, const BufferLayout &layout)
2004 {
2005 vector<int> sizes(layout.blocks.size());
2006
2007 for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
2008 {
2009 const BufferBlock &block = interface.getBlock(declNdx);
2010 const bool isArray = block.isArray();
2011 const int numInstances = isArray ? block.getArraySize() : 1;
2012
2013 for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
2014 {
2015 const string apiName = getBlockAPIName(block, instanceNdx);
2016 const int blockNdx = layout.getBlockIndex(apiName);
2017
2018 if (blockNdx >= 0)
2019 {
2020 const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
2021 const int baseSize = blockLayout.size;
2022 const bool isLastUnsized = hasUnsizedArray(layout, blockLayout);
2023 const int lastArraySize = isLastUnsized ? block.getLastUnsizedArraySize(instanceNdx) : 0;
2024 const int stride = isLastUnsized ? getUnsizedArrayStride(layout, blockLayout) : 0;
2025
2026 sizes[blockNdx] = baseSize + lastArraySize * stride;
2027 }
2028 }
2029 }
2030
2031 return sizes;
2032 }
2033
getBlockDataPtr(const BufferLayout & layout,const BlockLayoutEntry & blockLayout,void * ptr,int bufferSize)2034 BlockDataPtr getBlockDataPtr(const BufferLayout &layout, const BlockLayoutEntry &blockLayout, void *ptr, int bufferSize)
2035 {
2036 const bool isLastUnsized = hasUnsizedArray(layout, blockLayout);
2037 const int baseSize = blockLayout.size;
2038
2039 if (isLastUnsized)
2040 {
2041 const int lastArrayStride = getUnsizedArrayStride(layout, blockLayout);
2042 const int lastArraySize = (bufferSize - baseSize) / (lastArrayStride ? lastArrayStride : 1);
2043
2044 DE_ASSERT(baseSize + lastArraySize * lastArrayStride == bufferSize);
2045
2046 return BlockDataPtr(ptr, bufferSize, lastArraySize);
2047 }
2048 else
2049 return BlockDataPtr(ptr, bufferSize, 0);
2050 }
2051
2052 struct Buffer
2053 {
2054 uint32_t buffer;
2055 int size;
2056
Buffervkt::ssbo::__anon70c2de810211::Buffer2057 Buffer(uint32_t buffer_, int size_) : buffer(buffer_), size(size_)
2058 {
2059 }
Buffervkt::ssbo::__anon70c2de810211::Buffer2060 Buffer(void) : buffer(0), size(0)
2061 {
2062 }
2063 };
2064
2065 struct BlockLocation
2066 {
2067 int index;
2068 int offset;
2069 int size;
2070
BlockLocationvkt::ssbo::__anon70c2de810211::BlockLocation2071 BlockLocation(int index_, int offset_, int size_) : index(index_), offset(offset_), size(size_)
2072 {
2073 }
BlockLocationvkt::ssbo::__anon70c2de810211::BlockLocation2074 BlockLocation(void) : index(0), offset(0), size(0)
2075 {
2076 }
2077 };
2078
initRefDataStorage(const ShaderInterface & interface,const BufferLayout & layout,RefDataStorage & storage)2079 void initRefDataStorage(const ShaderInterface &interface, const BufferLayout &layout, RefDataStorage &storage)
2080 {
2081 DE_ASSERT(storage.data.empty() && storage.pointers.empty());
2082
2083 const vector<int> bufferSizes = computeBufferSizes(interface, layout);
2084 int totalSize = 0;
2085 const int vec4Alignment = (int)sizeof(uint32_t) * 4;
2086
2087 for (vector<int>::const_iterator sizeIter = bufferSizes.begin(); sizeIter != bufferSizes.end(); ++sizeIter)
2088 {
2089 // Include enough space for alignment of individual blocks
2090 totalSize += deRoundUp32(*sizeIter, vec4Alignment);
2091 }
2092
2093 storage.data.resize(totalSize);
2094
2095 // Pointers for each block.
2096 {
2097 uint8_t *basePtr = storage.data.empty() ? DE_NULL : &storage.data[0];
2098 int curOffset = 0;
2099
2100 DE_ASSERT(bufferSizes.size() == layout.blocks.size());
2101 DE_ASSERT(totalSize == 0 || basePtr);
2102
2103 storage.pointers.resize(layout.blocks.size());
2104
2105 for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
2106 {
2107 const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
2108 const int bufferSize = bufferSizes[blockNdx];
2109
2110 storage.pointers[blockNdx] = getBlockDataPtr(layout, blockLayout, basePtr + curOffset, bufferSize);
2111
2112 // Ensure each new block starts fully aligned to avoid unaligned host accesses
2113 curOffset += deRoundUp32(bufferSize, vec4Alignment);
2114 }
2115 }
2116 }
2117
blockLocationsToPtrs(const BufferLayout & layout,const vector<BlockLocation> & blockLocations,const vector<void * > & bufPtrs)2118 vector<BlockDataPtr> blockLocationsToPtrs(const BufferLayout &layout, const vector<BlockLocation> &blockLocations,
2119 const vector<void *> &bufPtrs)
2120 {
2121 vector<BlockDataPtr> blockPtrs(blockLocations.size());
2122
2123 DE_ASSERT(layout.blocks.size() == blockLocations.size());
2124
2125 for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
2126 {
2127 const BlockLayoutEntry &blockLayout = layout.blocks[blockNdx];
2128 const BlockLocation &location = blockLocations[blockNdx];
2129
2130 blockPtrs[blockNdx] =
2131 getBlockDataPtr(layout, blockLayout, (uint8_t *)bufPtrs[location.index] + location.offset, location.size);
2132 }
2133
2134 return blockPtrs;
2135 }
2136
2137 } // namespace
2138
allocateAndBindMemory(Context & context,vk::VkBuffer buffer,vk::MemoryRequirement memReqs)2139 de::MovePtr<vk::Allocation> allocateAndBindMemory(Context &context, vk::VkBuffer buffer, vk::MemoryRequirement memReqs)
2140 {
2141 const vk::DeviceInterface &vkd = context.getDeviceInterface();
2142 const vk::VkMemoryRequirements bufReqs = vk::getBufferMemoryRequirements(vkd, context.getDevice(), buffer);
2143 de::MovePtr<vk::Allocation> memory = context.getDefaultAllocator().allocate(bufReqs, memReqs);
2144
2145 vkd.bindBufferMemory(context.getDevice(), buffer, memory->getMemory(), memory->getOffset());
2146
2147 return memory;
2148 }
2149
createBuffer(Context & context,vk::VkDeviceSize bufferSize,vk::VkBufferUsageFlags usageFlags)2150 vk::Move<vk::VkBuffer> createBuffer(Context &context, vk::VkDeviceSize bufferSize, vk::VkBufferUsageFlags usageFlags)
2151 {
2152 const vk::VkDevice vkDevice = context.getDevice();
2153 const vk::DeviceInterface &vk = context.getDeviceInterface();
2154 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2155
2156 const vk::VkBufferCreateInfo bufferInfo = {
2157 vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2158 DE_NULL, // const void* pNext;
2159 0u, // VkBufferCreateFlags flags;
2160 bufferSize, // VkDeviceSize size;
2161 usageFlags, // VkBufferUsageFlags usage;
2162 vk::VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2163 1u, // uint32_t queueFamilyCount;
2164 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
2165 };
2166
2167 return vk::createBuffer(vk, vkDevice, &bufferInfo);
2168 }
2169
2170 // SSBOLayoutCaseInstance
2171
2172 class SSBOLayoutCaseInstance : public TestInstance
2173 {
2174 public:
2175 SSBOLayoutCaseInstance(Context &context, SSBOLayoutCase::BufferMode bufferMode, const ShaderInterface &interface,
2176 const BufferLayout &refLayout, const RefDataStorage &initialData,
2177 const RefDataStorage &writeData, bool usePhysStorageBuffer);
2178 virtual ~SSBOLayoutCaseInstance(void);
2179 virtual tcu::TestStatus iterate(void);
2180
2181 private:
2182 SSBOLayoutCase::BufferMode m_bufferMode;
2183 const ShaderInterface &m_interface;
2184 const BufferLayout &m_refLayout;
2185 const RefDataStorage &m_initialData; // Initial data stored in buffer.
2186 const RefDataStorage &m_writeData; // Data written by compute shader.
2187 const bool m_usePhysStorageBuffer;
2188
2189 typedef de::SharedPtr<vk::Unique<vk::VkBuffer>> VkBufferSp;
2190 typedef de::SharedPtr<vk::Allocation> AllocationSp;
2191
2192 std::vector<VkBufferSp> m_uniformBuffers;
2193 std::vector<AllocationSp> m_uniformAllocs;
2194 };
2195
SSBOLayoutCaseInstance(Context & context,SSBOLayoutCase::BufferMode bufferMode,const ShaderInterface & interface,const BufferLayout & refLayout,const RefDataStorage & initialData,const RefDataStorage & writeData,bool usePhysStorageBuffer)2196 SSBOLayoutCaseInstance::SSBOLayoutCaseInstance(Context &context, SSBOLayoutCase::BufferMode bufferMode,
2197 const ShaderInterface &interface, const BufferLayout &refLayout,
2198 const RefDataStorage &initialData, const RefDataStorage &writeData,
2199 bool usePhysStorageBuffer)
2200 : TestInstance(context)
2201 , m_bufferMode(bufferMode)
2202 , m_interface(interface)
2203 , m_refLayout(refLayout)
2204 , m_initialData(initialData)
2205 , m_writeData(writeData)
2206 , m_usePhysStorageBuffer(usePhysStorageBuffer)
2207 {
2208 }
2209
~SSBOLayoutCaseInstance(void)2210 SSBOLayoutCaseInstance::~SSBOLayoutCaseInstance(void)
2211 {
2212 }
2213
iterate(void)2214 tcu::TestStatus SSBOLayoutCaseInstance::iterate(void)
2215 {
2216 // todo: add compute stage availability check
2217 const vk::DeviceInterface &vk = m_context.getDeviceInterface();
2218 const vk::VkDevice device = m_context.getDevice();
2219 const vk::VkQueue queue = m_context.getUniversalQueue();
2220 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2221
2222 // Create descriptor set
2223 const uint32_t acBufferSize = 1024;
2224 vk::Move<vk::VkBuffer> acBuffer(createBuffer(m_context, acBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
2225 de::UniquePtr<vk::Allocation> acBufferAlloc(
2226 allocateAndBindMemory(m_context, *acBuffer, vk::MemoryRequirement::HostVisible));
2227
2228 deMemset(acBufferAlloc->getHostPtr(), 0, acBufferSize);
2229 flushMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
2230
2231 vk::DescriptorSetLayoutBuilder setLayoutBuilder;
2232 vk::DescriptorPoolBuilder poolBuilder;
2233
2234 setLayoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2235
2236 int numBlocks = 0;
2237 const int numBindings = m_interface.getNumBlocks();
2238 for (int bindingNdx = 0; bindingNdx < numBindings; bindingNdx++)
2239 {
2240 const BufferBlock &block = m_interface.getBlock(bindingNdx);
2241 if (block.isArray())
2242 {
2243 setLayoutBuilder.addArrayBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, block.getArraySize(),
2244 vk::VK_SHADER_STAGE_COMPUTE_BIT);
2245 numBlocks += block.getArraySize();
2246 }
2247 else
2248 {
2249 setLayoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2250 numBlocks += 1;
2251 }
2252 }
2253
2254 poolBuilder.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (uint32_t)(1 + numBlocks));
2255
2256 const vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(setLayoutBuilder.build(vk, device));
2257 const vk::Unique<vk::VkDescriptorPool> descriptorPool(
2258 poolBuilder.build(vk, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2259
2260 const vk::VkDescriptorSetAllocateInfo allocInfo = {
2261 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL, *descriptorPool, 1u, &descriptorSetLayout.get(),
2262 };
2263
2264 const vk::Unique<vk::VkDescriptorSet> descriptorSet(allocateDescriptorSet(vk, device, &allocInfo));
2265 const vk::VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*acBuffer, 0ull, acBufferSize);
2266
2267 vk::DescriptorSetUpdateBuilder setUpdateBuilder;
2268 std::vector<vk::VkDescriptorBufferInfo> descriptors(numBlocks);
2269
2270 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u),
2271 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo);
2272
2273 vector<BlockDataPtr> mappedBlockPtrs;
2274
2275 vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
2276 bool memoryDeviceAddress = false;
2277 if (m_usePhysStorageBuffer)
2278 {
2279 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
2280 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
2281 memoryDeviceAddress = true;
2282 }
2283
2284 // Upload base buffers
2285 const std::vector<int> bufferSizes = computeBufferSizes(m_interface, m_refLayout);
2286 {
2287 std::vector<void *> mapPtrs;
2288 std::vector<BlockLocation> blockLocations(numBlocks);
2289
2290 DE_ASSERT(bufferSizes.size() == m_refLayout.blocks.size());
2291
2292 if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2293 {
2294 mapPtrs.resize(numBlocks);
2295 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2296 {
2297 const uint32_t bufferSize = bufferSizes[blockNdx];
2298 DE_ASSERT(bufferSize > 0);
2299
2300 blockLocations[blockNdx] = BlockLocation(blockNdx, 0, bufferSize);
2301
2302 vk::Move<vk::VkBuffer> buffer = createBuffer(m_context, bufferSize, usageFlags);
2303 de::MovePtr<vk::Allocation> alloc = allocateAndBindMemory(
2304 m_context, *buffer,
2305 vk::MemoryRequirement::HostVisible |
2306 (memoryDeviceAddress ? vk::MemoryRequirement::DeviceAddress : vk::MemoryRequirement::Any));
2307
2308 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, 0ull, bufferSize);
2309
2310 mapPtrs[blockNdx] = alloc->getHostPtr();
2311
2312 m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2313 m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2314 }
2315 }
2316 else
2317 {
2318 DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2319
2320 vk::VkPhysicalDeviceProperties properties;
2321 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
2322 const int bindingAlignment = (int)properties.limits.minStorageBufferOffsetAlignment;
2323 int curOffset = 0;
2324 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2325 {
2326 const int bufferSize = bufferSizes[blockNdx];
2327 DE_ASSERT(bufferSize > 0);
2328
2329 if (bindingAlignment > 0)
2330 curOffset = deRoundUp32(curOffset, bindingAlignment);
2331
2332 blockLocations[blockNdx] = BlockLocation(0, curOffset, bufferSize);
2333 curOffset += bufferSize;
2334 }
2335
2336 const int totalBufferSize = curOffset;
2337 vk::Move<vk::VkBuffer> buffer = createBuffer(m_context, totalBufferSize, usageFlags);
2338 de::MovePtr<vk::Allocation> alloc = allocateAndBindMemory(
2339 m_context, *buffer,
2340 vk::MemoryRequirement::HostVisible |
2341 (memoryDeviceAddress ? vk::MemoryRequirement::DeviceAddress : vk::MemoryRequirement::Any));
2342
2343 mapPtrs.push_back(alloc->getHostPtr());
2344
2345 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2346 {
2347 const uint32_t bufferSize = bufferSizes[blockNdx];
2348 const uint32_t offset = blockLocations[blockNdx].offset;
2349
2350 descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, offset, bufferSize);
2351 }
2352
2353 m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2354 m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2355 }
2356
2357 // Update remaining bindings
2358 {
2359 int blockNdx = 0;
2360 for (int bindingNdx = 0; bindingNdx < numBindings; ++bindingNdx)
2361 {
2362 const BufferBlock &block = m_interface.getBlock(bindingNdx);
2363 const int numBlocksInBinding = (block.isArray() ? block.getArraySize() : 1);
2364
2365 setUpdateBuilder.writeArray(
2366 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(bindingNdx + 1),
2367 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBlocksInBinding, &descriptors[blockNdx]);
2368
2369 blockNdx += numBlocksInBinding;
2370 }
2371 }
2372
2373 // Copy the initial data to the storage buffers
2374 {
2375 mappedBlockPtrs = blockLocationsToPtrs(m_refLayout, blockLocations, mapPtrs);
2376 copyData(m_refLayout, mappedBlockPtrs, m_refLayout, m_initialData.pointers);
2377
2378 for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2379 {
2380 vk::Allocation *alloc = m_uniformAllocs[allocNdx].get();
2381 flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), VK_WHOLE_SIZE);
2382 }
2383 }
2384 }
2385
2386 std::vector<vk::VkDeviceAddress> gpuAddrs;
2387 // Query the buffer device addresses and push them via push constants
2388 if (m_usePhysStorageBuffer)
2389 {
2390 //const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
2391
2392 vk::VkBufferDeviceAddressInfo info = {
2393 vk::VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
2394 DE_NULL, // const void* pNext;
2395 0, // VkBuffer buffer
2396 };
2397
2398 for (uint32_t i = 0; i < descriptors.size(); ++i)
2399 {
2400 info.buffer = descriptors[i].buffer;
2401 vk::VkDeviceAddress addr;
2402 //if (useKHR)
2403 addr = vk.getBufferDeviceAddress(device, &info);
2404 //else
2405 // addr = vk.getBufferDeviceAddressEXT(device, &info);
2406 addr += descriptors[i].offset;
2407 gpuAddrs.push_back(addr);
2408 }
2409 }
2410
2411 setUpdateBuilder.update(vk, device);
2412
2413 const vk::VkPushConstantRange pushConstRange = {
2414 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlags stageFlags
2415 0, // uint32_t offset
2416 (uint32_t)(sizeof(vk::VkDeviceAddress) * descriptors.size()) // uint32_t size
2417 };
2418
2419 // must fit in spec min max
2420 DE_ASSERT(pushConstRange.size <= 128);
2421
2422 const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
2423 vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2424 DE_NULL, // const void* pNext;
2425 (vk::VkPipelineLayoutCreateFlags)0,
2426 1u, // uint32_t descriptorSetCount;
2427 &*descriptorSetLayout, // const VkDescriptorSetLayout* pSetLayouts;
2428 m_usePhysStorageBuffer ? 1u : 0u, // uint32_t pushConstantRangeCount;
2429 &pushConstRange, // const VkPushConstantRange* pPushConstantRanges;
2430 };
2431 vk::Move<vk::VkPipelineLayout> pipelineLayout(createPipelineLayout(vk, device, &pipelineLayoutParams));
2432
2433 m_context.getTestContext().touchWatchdogAndDisableIntervalTimeLimit();
2434
2435 vk::Move<vk::VkShaderModule> shaderModule(
2436 createShaderModule(vk, device, m_context.getBinaryCollection().get("compute"), 0));
2437 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
2438 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2439 DE_NULL, // const void* pNext;
2440 (vk::VkPipelineShaderStageCreateFlags)0,
2441 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStage stage;
2442 *shaderModule, // VkShader shader;
2443 "main", //
2444 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
2445 };
2446 const vk::VkComputePipelineCreateInfo pipelineCreateInfo = {
2447 vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2448 DE_NULL, // const void* pNext;
2449 0, // VkPipelineCreateFlags flags;
2450 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
2451 *pipelineLayout, // VkPipelineLayout layout;
2452 DE_NULL, // VkPipeline basePipelineHandle;
2453 0, // int32_t basePipelineIndex;
2454 };
2455 vk::Move<vk::VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
2456
2457 m_context.getTestContext().touchWatchdogAndEnableIntervalTimeLimit();
2458
2459 vk::Move<vk::VkCommandPool> cmdPool(
2460 createCommandPool(vk, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
2461 vk::Move<vk::VkCommandBuffer> cmdBuffer(
2462 allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2463
2464 beginCommandBuffer(vk, *cmdBuffer, 0u);
2465
2466 vk.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2467
2468 if (gpuAddrs.size())
2469 {
2470 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, vk::VK_SHADER_STAGE_COMPUTE_BIT, 0,
2471 (uint32_t)(sizeof(vk::VkDeviceAddress) * gpuAddrs.size()), &gpuAddrs[0]);
2472 }
2473 vk.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
2474 &descriptorSet.get(), 0u, DE_NULL);
2475
2476 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
2477
2478 // Add barriers for shader writes to storage buffers before host access
2479 std::vector<vk::VkBufferMemoryBarrier> barriers;
2480 if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2481 {
2482 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2483 {
2484 const vk::VkBuffer uniformBuffer = m_uniformBuffers[blockNdx].get()->get();
2485
2486 const vk::VkBufferMemoryBarrier barrier = {vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2487 DE_NULL,
2488 vk::VK_ACCESS_SHADER_WRITE_BIT,
2489 vk::VK_ACCESS_HOST_READ_BIT,
2490 VK_QUEUE_FAMILY_IGNORED,
2491 VK_QUEUE_FAMILY_IGNORED,
2492 uniformBuffer,
2493 0u,
2494 static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])};
2495 barriers.push_back(barrier);
2496 }
2497 }
2498 else
2499 {
2500 const vk::VkBuffer uniformBuffer = m_uniformBuffers[0].get()->get();
2501
2502 vk::VkDeviceSize totalSize = 0;
2503 for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2504 totalSize += bufferSizes[bufferNdx];
2505
2506 const vk::VkBufferMemoryBarrier barrier = {vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2507 DE_NULL,
2508 vk::VK_ACCESS_SHADER_WRITE_BIT,
2509 vk::VK_ACCESS_HOST_READ_BIT,
2510 VK_QUEUE_FAMILY_IGNORED,
2511 VK_QUEUE_FAMILY_IGNORED,
2512 uniformBuffer,
2513 0u,
2514 totalSize};
2515 barriers.push_back(barrier);
2516 }
2517 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
2518 (vk::VkDependencyFlags)0, 0u, DE_NULL, static_cast<uint32_t>(barriers.size()), &barriers[0],
2519 0u, DE_NULL);
2520
2521 endCommandBuffer(vk, *cmdBuffer);
2522
2523 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
2524
2525 // Read back ac_numPassed data
2526 bool counterOk;
2527 {
2528 const int refCount = 1;
2529 int resCount = 0;
2530
2531 invalidateAlloc(vk, device, *acBufferAlloc);
2532
2533 resCount = *((const int *)acBufferAlloc->getHostPtr());
2534
2535 counterOk = (refCount == resCount);
2536 if (!counterOk)
2537 {
2538 m_context.getTestContext().getLog() << TestLog::Message << "Error: ac_numPassed = " << resCount
2539 << ", expected " << refCount << TestLog::EndMessage;
2540 }
2541 }
2542
2543 for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2544 {
2545 vk::Allocation *alloc = m_uniformAllocs[allocNdx].get();
2546 invalidateAlloc(vk, device, *alloc);
2547 }
2548
2549 // Validate result
2550 const bool compareOk = compareData(m_context.getTestContext().getLog(), m_refLayout, m_writeData.pointers,
2551 m_refLayout, mappedBlockPtrs);
2552
2553 if (compareOk && counterOk)
2554 return tcu::TestStatus::pass("Result comparison and counter values are OK");
2555 else if (!compareOk && counterOk)
2556 return tcu::TestStatus::fail("Result comparison failed");
2557 else if (compareOk && !counterOk)
2558 return tcu::TestStatus::fail("Counter value incorrect");
2559 else
2560 return tcu::TestStatus::fail("Result comparison and counter values are incorrect");
2561 }
2562
2563 // SSBOLayoutCase.
2564
SSBOLayoutCase(tcu::TestContext & testCtx,const char * name,BufferMode bufferMode,MatrixLoadFlags matrixLoadFlag,MatrixStoreFlags matrixStoreFlag,bool usePhysStorageBuffer)2565 SSBOLayoutCase::SSBOLayoutCase(tcu::TestContext &testCtx, const char *name, BufferMode bufferMode,
2566 MatrixLoadFlags matrixLoadFlag, MatrixStoreFlags matrixStoreFlag,
2567 bool usePhysStorageBuffer)
2568 : TestCase(testCtx, name)
2569 , m_bufferMode(bufferMode)
2570 , m_matrixLoadFlag(matrixLoadFlag)
2571 , m_matrixStoreFlag(matrixStoreFlag)
2572 , m_usePhysStorageBuffer(usePhysStorageBuffer)
2573 {
2574 }
2575
~SSBOLayoutCase(void)2576 SSBOLayoutCase::~SSBOLayoutCase(void)
2577 {
2578 }
2579
initPrograms(vk::SourceCollections & programCollection) const2580 void SSBOLayoutCase::initPrograms(vk::SourceCollections &programCollection) const
2581 {
2582 DE_ASSERT(!m_computeShaderSrc.empty());
2583
2584 // Valid scalar layouts are a superset of valid relaxed layouts. So check scalar layout first.
2585 if (usesScalarLayout(m_interface))
2586 {
2587 programCollection.glslSources.add("compute")
2588 << glu::ComputeSource(m_computeShaderSrc)
2589 << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0,
2590 vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS);
2591 }
2592 else if (usesRelaxedLayout(m_interface))
2593 {
2594 programCollection.glslSources.add("compute")
2595 << glu::ComputeSource(m_computeShaderSrc)
2596 << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0,
2597 vk::ShaderBuildOptions::FLAG_ALLOW_RELAXED_OFFSETS);
2598 }
2599 else
2600 programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc);
2601 }
2602
createInstance(Context & context) const2603 TestInstance *SSBOLayoutCase::createInstance(Context &context) const
2604 {
2605 return new SSBOLayoutCaseInstance(context, m_bufferMode, m_interface, m_refLayout, m_initialData, m_writeData,
2606 m_usePhysStorageBuffer);
2607 }
2608
checkSupport(Context & context) const2609 void SSBOLayoutCase::checkSupport(Context &context) const
2610 {
2611 if (!context.isDeviceFunctionalitySupported("VK_KHR_relaxed_block_layout") && usesRelaxedLayout(m_interface))
2612 TCU_THROW(NotSupportedError, "VK_KHR_relaxed_block_layout not supported");
2613 if (!context.get16BitStorageFeatures().storageBuffer16BitAccess && uses16BitStorage(m_interface))
2614 TCU_THROW(NotSupportedError, "storageBuffer16BitAccess not supported");
2615 if (!context.get8BitStorageFeatures().storageBuffer8BitAccess && uses8BitStorage(m_interface))
2616 TCU_THROW(NotSupportedError, "storageBuffer8BitAccess not supported");
2617 if (!context.getScalarBlockLayoutFeatures().scalarBlockLayout && usesScalarLayout(m_interface))
2618 TCU_THROW(NotSupportedError, "scalarBlockLayout not supported");
2619 if (m_usePhysStorageBuffer && !context.isBufferDeviceAddressSupported())
2620 TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
2621 if (usesDescriptorIndexing(m_interface) &&
2622 (!context.getDescriptorIndexingFeatures().shaderStorageBufferArrayNonUniformIndexing ||
2623 !context.getDescriptorIndexingFeatures().runtimeDescriptorArray))
2624 TCU_THROW(NotSupportedError, "Descriptor indexing over storage buffer not supported");
2625
2626 const vk::VkPhysicalDeviceProperties &properties = context.getDeviceProperties();
2627 // Shader defines N+1 storage buffers: N to operate and one more to store the number of cases passed.
2628 uint32_t blockCount = 1u;
2629 for (int32_t blockIdx = 0u; blockIdx < m_interface.getNumBlocks(); blockIdx++)
2630 {
2631 blockCount +=
2632 m_interface.getBlock(blockIdx).getArraySize() ? m_interface.getBlock(blockIdx).getArraySize() : 1u;
2633 }
2634
2635 if (properties.limits.maxPerStageDescriptorStorageBuffers < blockCount)
2636 TCU_THROW(NotSupportedError,
2637 "Descriptor set storage buffers count higher than the maximum supported by the driver");
2638 }
2639
delayedInit(void)2640 void SSBOLayoutCase::delayedInit(void)
2641 {
2642 computeReferenceLayout(m_refLayout, m_interface);
2643 initRefDataStorage(m_interface, m_refLayout, m_initialData);
2644 initRefDataStorage(m_interface, m_refLayout, m_writeData);
2645 generateValues(m_refLayout, m_initialData.pointers, deStringHash(getName()) ^ 0xad2f7214);
2646 generateValues(m_refLayout, m_writeData.pointers, deStringHash(getName()) ^ 0x25ca4e7);
2647 copyNonWrittenData(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers);
2648
2649 m_computeShaderSrc = generateComputeShader(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers,
2650 m_matrixLoadFlag, m_matrixStoreFlag, m_usePhysStorageBuffer);
2651 }
2652
2653 } // namespace ssbo
2654 } // namespace vkt
2655