1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Vulkan ShaderExecutor
24 *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38
39 #include "gluShaderUtil.hpp"
40
41 #include "tcuVector.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuTextureUtil.hpp"
44
45 #include "deUniquePtr.hpp"
46 #include "deStringUtil.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deFloat16.h"
49
50 #include <map>
51 #include <sstream>
52 #include <iostream>
53
54 using std::vector;
55 using namespace vk;
56
57 namespace vkt
58 {
59 namespace shaderexecutor
60 {
61 namespace
62 {
63
64 enum
65 {
66 DEFAULT_RENDER_WIDTH = 100,
67 DEFAULT_RENDER_HEIGHT = 100,
68 };
69
70 // Common typedefs
71
72 typedef de::SharedPtr<Unique<VkImage>> VkImageSp;
73 typedef de::SharedPtr<Unique<VkImageView>> VkImageViewSp;
74 typedef de::SharedPtr<Unique<VkBuffer>> VkBufferSp;
75 typedef de::SharedPtr<Allocation> AllocationSp;
76
77 static VkFormat getAttributeFormat(const glu::DataType dataType);
78
79 // Shader utilities
80
getDefaultClearColor(void)81 static VkClearValue getDefaultClearColor(void)
82 {
83 return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
84 }
85
generateEmptyFragmentSource(void)86 static std::string generateEmptyFragmentSource(void)
87 {
88 std::ostringstream src;
89
90 src << "#version 450\n"
91 "layout(location=0) out highp vec4 o_color;\n";
92
93 src << "void main (void)\n{\n";
94 src << " o_color = vec4(0.0);\n";
95 src << "}\n";
96
97 return src.str();
98 }
99
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)100 void packFloat16Bit(std::ostream &src, const std::vector<Symbol> &outputs)
101 {
102 for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
103 {
104 if (glu::isDataTypeFloatType(symIter->varType.getBasicType()))
105 {
106 if (glu::isDataTypeVector(symIter->varType.getBasicType()))
107 {
108 for (int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
109 {
110 src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2("
111 << symIter->name << "[" << i << "], -1.0)));\n";
112 }
113 }
114 else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
115 {
116 int maxRow = 0;
117 int maxCol = 0;
118 switch (symIter->varType.getBasicType())
119 {
120 case glu::TYPE_FLOAT_MAT2:
121 maxRow = maxCol = 2;
122 break;
123 case glu::TYPE_FLOAT_MAT2X3:
124 maxRow = 2;
125 maxCol = 3;
126 break;
127 case glu::TYPE_FLOAT_MAT2X4:
128 maxRow = 2;
129 maxCol = 4;
130 break;
131 case glu::TYPE_FLOAT_MAT3X2:
132 maxRow = 3;
133 maxCol = 2;
134 break;
135 case glu::TYPE_FLOAT_MAT3:
136 maxRow = maxCol = 3;
137 break;
138 case glu::TYPE_FLOAT_MAT3X4:
139 maxRow = 3;
140 maxCol = 4;
141 break;
142 case glu::TYPE_FLOAT_MAT4X2:
143 maxRow = 4;
144 maxCol = 2;
145 break;
146 case glu::TYPE_FLOAT_MAT4X3:
147 maxRow = 4;
148 maxCol = 3;
149 break;
150 case glu::TYPE_FLOAT_MAT4:
151 maxRow = maxCol = 4;
152 break;
153 default:
154 DE_ASSERT(false);
155 break;
156 }
157
158 for (int i = 0; i < maxRow; i++)
159 for (int j = 0; j < maxCol; j++)
160 {
161 src << "\tpacked_" << symIter->name << "[" << i << "][" << j
162 << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j
163 << "], -1.0)));\n";
164 }
165 }
166 else
167 {
168 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name
169 << ", -1.0)));\n";
170 }
171 }
172 }
173 }
174
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)175 static std::string generatePassthroughVertexShader(const ShaderSpec &shaderSpec, const char *inputPrefix,
176 const char *outputPrefix)
177 {
178 std::ostringstream src;
179 int location = 0;
180
181 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
182
183 if (!shaderSpec.globalDeclarations.empty())
184 src << shaderSpec.globalDeclarations << "\n";
185
186 src << "layout(location = " << location << ") in highp vec4 a_position;\n";
187
188 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
189 {
190 location++;
191 src << "layout(location = " << location << ") in " << glu::declare(input->varType, inputPrefix + input->name)
192 << ";\n"
193 << "layout(location = " << location - 1 << ") flat out "
194 << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
195 }
196
197 src << "\nvoid main (void)\n{\n"
198 << " gl_Position = a_position;\n"
199 << " gl_PointSize = 1.0;\n";
200
201 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
202 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
203
204 src << "}\n";
205
206 return src.str();
207 }
208
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)209 static std::string generateVertexShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
210 const std::string &outputPrefix)
211 {
212 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
213
214 std::ostringstream src;
215
216 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
217
218 if (!shaderSpec.globalDeclarations.empty())
219 src << shaderSpec.globalDeclarations << "\n";
220
221 src << "layout(location = 0) in highp vec4 a_position;\n";
222
223 int locationNumber = 1;
224 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
225 ++input, ++locationNumber)
226 {
227 src << "layout(location = " << locationNumber << ") in "
228 << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
229 }
230
231 locationNumber = 0;
232 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
233 ++output, ++locationNumber)
234 {
235 DE_ASSERT(output->varType.isBasicType());
236
237 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
238 {
239 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
240 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
241 const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
242
243 src << "layout(location = " << locationNumber << ") flat out "
244 << glu::declare(intType, outputPrefix + output->name) << ";\n";
245 }
246 else
247 src << "layout(location = " << locationNumber << ") flat out "
248 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
249 }
250
251 src << "\n"
252 << "void main (void)\n"
253 << "{\n"
254 << " gl_Position = a_position;\n"
255 << " gl_PointSize = 1.0;\n";
256
257 // Declare & fetch local input variables
258 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
259 {
260 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
261 {
262 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
263 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
264 }
265 else
266 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
267 }
268
269 // Declare local output variables
270 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
271 ++output)
272 {
273 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
274 {
275 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
276 src << "\t" << tname << " " << output->name << ";\n";
277 const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
278 src << "\t" << tname2 << " "
279 << "packed_" << output->name << ";\n";
280 }
281 else
282 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
283 }
284
285 // Operation - indented to correct level.
286 {
287 std::istringstream opSrc(shaderSpec.source);
288 std::string line;
289
290 while (std::getline(opSrc, line))
291 src << "\t" << line << "\n";
292 }
293
294 if (shaderSpec.packFloat16Bit)
295 packFloat16Bit(src, shaderSpec.outputs);
296
297 // Assignments to outputs.
298 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
299 ++output)
300 {
301 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
302 {
303 src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
304 }
305 else
306 {
307 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
308 {
309 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
310 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
311
312 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
313 << output->name << ");\n";
314 }
315 else
316 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
317 }
318 }
319
320 src << "}\n";
321
322 return src.str();
323 }
324
325 struct FragmentOutputLayout
326 {
327 std::vector<const Symbol *> locationSymbols; //! Symbols by location
328 std::map<std::string, int> locationMap; //! Map from symbol name to start location
329 };
330
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)331 static void generateFragShaderOutputDecl(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
332 const std::map<std::string, int> &outLocationMap,
333 const std::string &outputPrefix)
334 {
335 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
336 {
337 const Symbol &output = shaderSpec.outputs[outNdx];
338 const int location = de::lookup(outLocationMap, output.name);
339 const std::string outVarName = outputPrefix + output.name;
340 glu::VariableDeclaration decl(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST,
341 glu::Layout(location));
342
343 TCU_CHECK_INTERNAL(output.varType.isBasicType());
344
345 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
346 {
347 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
348 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
349 const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
350
351 decl.varType = uintType;
352 src << decl << ";\n";
353 }
354 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
355 {
356 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
357 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
358 const glu::VarType intType(intBasicType, glu::PRECISION_HIGHP);
359
360 decl.varType = intType;
361 src << decl << ";\n";
362 }
363 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
364 {
365 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
366 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
367 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
368 const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
369
370 decl.varType = uintType;
371 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
372 {
373 decl.name = outVarName + "_" + de::toString(vecNdx);
374 decl.layout.location = location + vecNdx;
375 src << decl << ";\n";
376 }
377 }
378 else
379 src << decl << ";\n";
380 }
381 }
382
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)383 static void generateFragShaderOutAssign(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
384 const std::string &valuePrefix, const std::string &outputPrefix,
385 const bool isInput16Bit = false)
386 {
387 if (isInput16Bit)
388 packFloat16Bit(src, shaderSpec.outputs);
389
390 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
391 ++output)
392 {
393 const std::string packPrefix =
394 (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
395
396 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
397 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
398 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
399 {
400 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
401
402 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
403 if (useIntOutputs)
404 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix
405 << output->name << "[" << vecNdx << "]);\n";
406 else
407 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix
408 << output->name << "[" << vecNdx << "];\n";
409 }
410 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
411 {
412 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
413 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
414
415 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
416 << valuePrefix << output->name << ");\n";
417 }
418 else
419 src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
420 }
421 }
422
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)423 static std::string generatePassthroughFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
424 const std::map<std::string, int> &outLocationMap,
425 const std::string &inputPrefix, const std::string &outputPrefix)
426 {
427 std::ostringstream src;
428
429 src << "#version 450\n";
430
431 if (!shaderSpec.globalDeclarations.empty())
432 src << shaderSpec.globalDeclarations << "\n";
433
434 int locationNumber = 0;
435 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
436 ++output, ++locationNumber)
437 {
438 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
439 {
440 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
441 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
442 const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
443
444 src << "layout(location = " << locationNumber << ") flat in "
445 << glu::declare(intType, inputPrefix + output->name) << ";\n";
446 }
447 else
448 src << "layout(location = " << locationNumber << ") flat in "
449 << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
450 }
451
452 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
453
454 src << "\nvoid main (void)\n{\n";
455
456 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
457
458 src << "}\n";
459
460 return src.str();
461 }
462
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)463 static std::string generateGeometryShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
464 const std::string &outputPrefix, const bool pointSizeSupported)
465 {
466 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
467
468 std::ostringstream src;
469
470 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
471
472 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
473 src << "#extension GL_EXT_geometry_shader : require\n";
474
475 if (!shaderSpec.globalDeclarations.empty())
476 src << shaderSpec.globalDeclarations << "\n";
477
478 src << "layout(points) in;\n"
479 << "layout(points, max_vertices = 1) out;\n";
480
481 int locationNumber = 0;
482 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
483 ++input, ++locationNumber)
484 src << "layout(location = " << locationNumber << ") flat in "
485 << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
486
487 locationNumber = 0;
488 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
489 ++output, ++locationNumber)
490 {
491 DE_ASSERT(output->varType.isBasicType());
492
493 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
494 {
495 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
496 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
497 const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
498
499 src << "layout(location = " << locationNumber << ") flat out "
500 << glu::declare(intType, outputPrefix + output->name) << ";\n";
501 }
502 else
503 src << "layout(location = " << locationNumber << ") flat out "
504 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
505 }
506
507 src << "\n"
508 << "void main (void)\n"
509 << "{\n"
510 << " gl_Position = gl_in[0].gl_Position;\n"
511 << (pointSizeSupported ? " gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
512
513 // Fetch input variables
514 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
515 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
516
517 // Declare local output variables.
518 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
519 ++output)
520 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
521
522 src << "\n";
523
524 // Operation - indented to correct level.
525 {
526 std::istringstream opSrc(shaderSpec.source);
527 std::string line;
528
529 while (std::getline(opSrc, line))
530 src << "\t" << line << "\n";
531 }
532
533 // Assignments to outputs.
534 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
535 ++output)
536 {
537 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
538 {
539 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
540 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
541
542 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
543 << output->name << ");\n";
544 }
545 else
546 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
547 }
548
549 src << " EmitVertex();\n"
550 << " EndPrimitive();\n"
551 << "}\n";
552
553 return src.str();
554 }
555
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)556 static std::string generateFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
557 const std::map<std::string, int> &outLocationMap,
558 const std::string &inputPrefix, const std::string &outputPrefix)
559 {
560 std::ostringstream src;
561 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
562 if (!shaderSpec.globalDeclarations.empty())
563 src << shaderSpec.globalDeclarations << "\n";
564
565 int locationNumber = 0;
566 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
567 ++input, ++locationNumber)
568 {
569 src << "layout(location = " << locationNumber << ") flat in "
570 << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
571 }
572
573 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
574
575 src << "\nvoid main (void)\n{\n";
576
577 // Declare & fetch local input variables
578 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
579 {
580 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
581 {
582 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
583 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
584 }
585 else
586 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
587 }
588
589 // Declare output variables
590 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
591 ++output)
592 {
593 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
594 {
595 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
596 src << "\t" << tname << " " << output->name << ";\n";
597 const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
598 src << "\t" << tname2 << " "
599 << "packed_" << output->name << ";\n";
600 }
601 else
602 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
603 }
604
605 // Operation - indented to correct level.
606 {
607 std::istringstream opSrc(shaderSpec.source);
608 std::string line;
609
610 while (std::getline(opSrc, line))
611 src << "\t" << line << "\n";
612 }
613
614 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
615
616 src << "}\n";
617
618 return src.str();
619 }
620
621 // FragmentOutExecutor
622
623 class FragmentOutExecutor : public ShaderExecutor
624 {
625 public:
626 FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
627 VkDescriptorSetLayout extraResourcesLayout);
628 virtual ~FragmentOutExecutor(void);
629
630 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
631 VkDescriptorSet extraResources);
632
633 protected:
634 const glu::ShaderType m_shaderType;
635 const FragmentOutputLayout m_outputLayout;
636
637 private:
638 void bindAttributes(int numValues, const void *const *inputs);
639
640 void addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement, uint32_t count,
641 const void *dataPtr);
642 // reinit render data members
643 virtual void clearRenderData(void);
644
645 const VkDescriptorSetLayout m_extraResourcesLayout;
646
647 std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
648 std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
649 std::vector<VkBufferSp> m_vertexBuffers;
650 std::vector<AllocationSp> m_vertexBufferAllocs;
651 };
652
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)653 static FragmentOutputLayout computeFragmentOutputLayout(const std::vector<Symbol> &symbols)
654 {
655 FragmentOutputLayout ret;
656 int location = 0;
657
658 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
659 {
660 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
661
662 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
663 de::insert(ret.locationMap, it->name, location);
664 location += numLocations;
665
666 for (int ndx = 0; ndx < numLocations; ++ndx)
667 ret.locationSymbols.push_back(&*it);
668 }
669
670 return ret;
671 }
672
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)673 FragmentOutExecutor::FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
674 VkDescriptorSetLayout extraResourcesLayout)
675 : ShaderExecutor(context, shaderSpec)
676 , m_shaderType(shaderType)
677 , m_outputLayout(computeFragmentOutputLayout(m_shaderSpec.outputs))
678 , m_extraResourcesLayout(extraResourcesLayout)
679 {
680 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
681 const InstanceInterface &vki = m_context.getInstanceInterface();
682
683 // Input attributes
684 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
685 {
686 const Symbol &symbol = m_shaderSpec.inputs[inputNdx];
687 const glu::DataType basicType = symbol.varType.getBasicType();
688 const VkFormat format = getAttributeFormat(basicType);
689 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
690 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
691 TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
692 }
693 }
694
~FragmentOutExecutor(void)695 FragmentOutExecutor::~FragmentOutExecutor(void)
696 {
697 }
698
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)699 static std::vector<tcu::Vec2> computeVertexPositions(int numValues, const tcu::IVec2 &renderSize)
700 {
701 std::vector<tcu::Vec2> positions(numValues);
702 for (int valNdx = 0; valNdx < numValues; valNdx++)
703 {
704 const int ix = valNdx % renderSize.x();
705 const int iy = valNdx / renderSize.x();
706 const float fx = -1.0f + 2.0f * ((float(ix) + 0.5f) / float(renderSize.x()));
707 const float fy = -1.0f + 2.0f * ((float(iy) + 0.5f) / float(renderSize.y()));
708
709 positions[valNdx] = tcu::Vec2(fx, fy);
710 }
711
712 return positions;
713 }
714
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)715 static tcu::TextureFormat getRenderbufferFormatForOutput(const glu::VarType &outputType, bool useIntOutputs)
716 {
717 const tcu::TextureFormat::ChannelOrder channelOrderMap[] = {tcu::TextureFormat::R, tcu::TextureFormat::RG,
718 tcu::TextureFormat::RGBA, // No RGB variants available.
719 tcu::TextureFormat::RGBA};
720
721 const glu::DataType basicType = outputType.getBasicType();
722 const int numComps = glu::getDataTypeNumComponents(basicType);
723 tcu::TextureFormat::ChannelType channelType;
724
725 switch (glu::getDataTypeScalarType(basicType))
726 {
727 case glu::TYPE_UINT:
728 channelType = tcu::TextureFormat::UNSIGNED_INT32;
729 break;
730 case glu::TYPE_INT:
731 channelType = tcu::TextureFormat::SIGNED_INT32;
732 break;
733 case glu::TYPE_BOOL:
734 channelType = tcu::TextureFormat::SIGNED_INT32;
735 break;
736 case glu::TYPE_FLOAT:
737 channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;
738 break;
739 case glu::TYPE_FLOAT16:
740 channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;
741 break;
742 default:
743 throw tcu::InternalError("Invalid output type");
744 }
745
746 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
747
748 return tcu::TextureFormat(channelOrderMap[numComps - 1], channelType);
749 }
750
getAttributeFormat(const glu::DataType dataType)751 static VkFormat getAttributeFormat(const glu::DataType dataType)
752 {
753 switch (dataType)
754 {
755 case glu::TYPE_FLOAT16:
756 return VK_FORMAT_R16_SFLOAT;
757 case glu::TYPE_FLOAT16_VEC2:
758 return VK_FORMAT_R16G16_SFLOAT;
759 case glu::TYPE_FLOAT16_VEC3:
760 return VK_FORMAT_R16G16B16_SFLOAT;
761 case glu::TYPE_FLOAT16_VEC4:
762 return VK_FORMAT_R16G16B16A16_SFLOAT;
763
764 case glu::TYPE_FLOAT:
765 return VK_FORMAT_R32_SFLOAT;
766 case glu::TYPE_FLOAT_VEC2:
767 return VK_FORMAT_R32G32_SFLOAT;
768 case glu::TYPE_FLOAT_VEC3:
769 return VK_FORMAT_R32G32B32_SFLOAT;
770 case glu::TYPE_FLOAT_VEC4:
771 return VK_FORMAT_R32G32B32A32_SFLOAT;
772
773 case glu::TYPE_INT:
774 return VK_FORMAT_R32_SINT;
775 case glu::TYPE_INT_VEC2:
776 return VK_FORMAT_R32G32_SINT;
777 case glu::TYPE_INT_VEC3:
778 return VK_FORMAT_R32G32B32_SINT;
779 case glu::TYPE_INT_VEC4:
780 return VK_FORMAT_R32G32B32A32_SINT;
781
782 case glu::TYPE_UINT:
783 return VK_FORMAT_R32_UINT;
784 case glu::TYPE_UINT_VEC2:
785 return VK_FORMAT_R32G32_UINT;
786 case glu::TYPE_UINT_VEC3:
787 return VK_FORMAT_R32G32B32_UINT;
788 case glu::TYPE_UINT_VEC4:
789 return VK_FORMAT_R32G32B32A32_UINT;
790
791 case glu::TYPE_FLOAT_MAT2:
792 return VK_FORMAT_R32G32_SFLOAT;
793 case glu::TYPE_FLOAT_MAT2X3:
794 return VK_FORMAT_R32G32B32_SFLOAT;
795 case glu::TYPE_FLOAT_MAT2X4:
796 return VK_FORMAT_R32G32B32A32_SFLOAT;
797 case glu::TYPE_FLOAT_MAT3X2:
798 return VK_FORMAT_R32G32_SFLOAT;
799 case glu::TYPE_FLOAT_MAT3:
800 return VK_FORMAT_R32G32B32_SFLOAT;
801 case glu::TYPE_FLOAT_MAT3X4:
802 return VK_FORMAT_R32G32B32A32_SFLOAT;
803 case glu::TYPE_FLOAT_MAT4X2:
804 return VK_FORMAT_R32G32_SFLOAT;
805 case glu::TYPE_FLOAT_MAT4X3:
806 return VK_FORMAT_R32G32B32_SFLOAT;
807 case glu::TYPE_FLOAT_MAT4:
808 return VK_FORMAT_R32G32B32A32_SFLOAT;
809 default:
810 DE_ASSERT(false);
811 return VK_FORMAT_UNDEFINED;
812 }
813 }
814
addAttribute(uint32_t bindingLocation,VkFormat format,uint32_t sizePerElement,uint32_t count,const void * dataPtr)815 void FragmentOutExecutor::addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement,
816 uint32_t count, const void *dataPtr)
817 {
818 // Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
819 // this value is usually 4 and current tests meet this requirement but
820 // if this changes in future then this limit should be verified in checkSupport
821 #ifndef CTS_USES_VULKANSC
822 if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
823 ((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
824 {
825 DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
826 }
827 #endif // CTS_USES_VULKANSC
828
829 // Add binding specification
830 const uint32_t binding = (uint32_t)m_vertexBindingDescriptions.size();
831 const VkVertexInputBindingDescription bindingDescription = {binding, sizePerElement, VK_VERTEX_INPUT_RATE_VERTEX};
832
833 m_vertexBindingDescriptions.push_back(bindingDescription);
834
835 // Add location and format specification
836 const VkVertexInputAttributeDescription attributeDescription = {
837 bindingLocation, // uint32_t location;
838 binding, // uint32_t binding;
839 format, // VkFormat format;
840 0u, // uint32_t offsetInBytes;
841 };
842
843 m_vertexAttributeDescriptions.push_back(attributeDescription);
844
845 // Upload data to buffer
846 const VkDevice vkDevice = m_context.getDevice();
847 const DeviceInterface &vk = m_context.getDeviceInterface();
848 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
849
850 const VkDeviceSize inputSize = sizePerElement * count;
851 const VkBufferCreateInfo vertexBufferParams = {
852 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
853 DE_NULL, // const void* pNext;
854 0u, // VkBufferCreateFlags flags;
855 inputSize, // VkDeviceSize size;
856 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // VkBufferUsageFlags usage;
857 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
858 1u, // uint32_t queueFamilyCount;
859 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
860 };
861
862 Move<VkBuffer> buffer = createBuffer(vk, vkDevice, &vertexBufferParams);
863 de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(
864 getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
865
866 VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
867
868 deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
869 flushAlloc(vk, vkDevice, *alloc);
870
871 m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer>>(new Unique<VkBuffer>(buffer)));
872 m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
873 }
874
bindAttributes(int numValues,const void * const * inputs)875 void FragmentOutExecutor::bindAttributes(int numValues, const void *const *inputs)
876 {
877 // Input attributes
878 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
879 {
880 const Symbol &symbol = m_shaderSpec.inputs[inputNdx];
881 const void *ptr = inputs[inputNdx];
882 const glu::DataType basicType = symbol.varType.getBasicType();
883 const int vecSize = glu::getDataTypeScalarSize(basicType);
884 const VkFormat format = getAttributeFormat(basicType);
885 int elementSize = 0;
886 int numAttrsToAdd = 1;
887
888 if (glu::isDataTypeDoubleOrDVec(basicType))
889 elementSize = sizeof(double);
890 if (glu::isDataTypeFloatOrVec(basicType))
891 elementSize = sizeof(float);
892 else if (glu::isDataTypeFloat16OrVec(basicType))
893 elementSize = sizeof(uint16_t);
894 else if (glu::isDataTypeIntOrIVec(basicType))
895 elementSize = sizeof(int);
896 else if (glu::isDataTypeUintOrUVec(basicType))
897 elementSize = sizeof(uint32_t);
898 else if (glu::isDataTypeMatrix(basicType))
899 {
900 int numRows = glu::getDataTypeMatrixNumRows(basicType);
901 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
902
903 elementSize = numRows * numCols * (int)sizeof(float);
904 numAttrsToAdd = numCols;
905 }
906 else
907 DE_ASSERT(false);
908
909 // add attributes, in case of matrix every column is binded as an attribute
910 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
911 {
912 addAttribute((uint32_t)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
913 }
914 }
915 }
916
clearRenderData(void)917 void FragmentOutExecutor::clearRenderData(void)
918 {
919 m_vertexBindingDescriptions.clear();
920 m_vertexAttributeDescriptions.clear();
921 m_vertexBuffers.clear();
922 m_vertexBufferAllocs.clear();
923 }
924
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)925 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout(const DeviceInterface &vkd, VkDevice device)
926 {
927 const VkDescriptorSetLayoutCreateInfo createInfo = {
928 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, DE_NULL, (VkDescriptorSetLayoutCreateFlags)0, 0u, DE_NULL,
929 };
930 return createDescriptorSetLayout(vkd, device, &createInfo);
931 }
932
createEmptyDescriptorPool(const DeviceInterface & vkd,VkDevice device)933 static Move<VkDescriptorPool> createEmptyDescriptorPool(const DeviceInterface &vkd, VkDevice device)
934 {
935 const VkDescriptorPoolSize emptySize = {
936 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
937 1u,
938 };
939 const VkDescriptorPoolCreateInfo createInfo = {
940 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
941 DE_NULL,
942 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
943 1u,
944 1u,
945 &emptySize};
946 return createDescriptorPool(vkd, device, &createInfo);
947 }
948
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)949 static Move<VkDescriptorSet> allocateSingleDescriptorSet(const DeviceInterface &vkd, VkDevice device,
950 VkDescriptorPool pool, VkDescriptorSetLayout layout)
951 {
952 const VkDescriptorSetAllocateInfo allocInfo = {
953 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL, pool, 1u, &layout,
954 };
955 return allocateDescriptorSet(vkd, device, &allocInfo);
956 }
957
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)958 void FragmentOutExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
959 VkDescriptorSet extraResources)
960 {
961 const VkDevice vkDevice = m_context.getDevice();
962 const DeviceInterface &vk = m_context.getDeviceInterface();
963 const VkQueue queue = m_context.getUniversalQueue();
964 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
965 Allocator &memAlloc = m_context.getDefaultAllocator();
966
967 const uint32_t renderSizeX = de::min(static_cast<uint32_t>(128), (uint32_t)numValues);
968 const uint32_t renderSizeY =
969 ((uint32_t)numValues / renderSizeX) + (((uint32_t)numValues % renderSizeX != 0) ? 1u : 0u);
970 const tcu::UVec2 renderSize(renderSizeX, renderSizeY);
971 std::vector<tcu::Vec2> positions;
972
973 const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
974
975 std::vector<VkImageSp> colorImages;
976 std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
977 std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
978 std::vector<AllocationSp> colorImageAllocs;
979 std::vector<VkAttachmentDescription> attachments;
980 std::vector<VkClearValue> attachmentClearValues;
981 std::vector<VkImageViewSp> colorImageViews;
982
983 std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
984 std::vector<VkAttachmentReference> colorAttachmentReferences;
985
986 Move<VkRenderPass> renderPass;
987 Move<VkFramebuffer> framebuffer;
988 Move<VkPipelineLayout> pipelineLayout;
989 Move<VkPipeline> graphicsPipeline;
990
991 Move<VkShaderModule> vertexShaderModule;
992 Move<VkShaderModule> geometryShaderModule;
993 Move<VkShaderModule> fragmentShaderModule;
994
995 Move<VkCommandPool> cmdPool;
996 Move<VkCommandBuffer> cmdBuffer;
997
998 Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout(createEmptyDescriptorSetLayout(vk, vkDevice));
999 Unique<VkDescriptorPool> emptyDescriptorPool(createEmptyDescriptorPool(vk, vkDevice));
1000 Unique<VkDescriptorSet> emptyDescriptorSet(
1001 allocateSingleDescriptorSet(vk, vkDevice, *emptyDescriptorPool, *emptyDescriptorSetLayout));
1002
1003 clearRenderData();
1004
1005 // Compute positions - 1px points are used to drive fragment shading.
1006 positions = computeVertexPositions(numValues, renderSize.cast<int>());
1007
1008 // Bind attributes
1009 addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (uint32_t)positions.size(), &positions[0]);
1010 bindAttributes(numValues, inputs);
1011
1012 // Create color images
1013 {
1014 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
1015 VK_FALSE, // VkBool32 blendEnable;
1016 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
1017 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
1018 VK_BLEND_OP_ADD, // VkBlendOp blendOpColor;
1019 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
1020 VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
1021 VK_BLEND_OP_ADD, // VkBlendOp blendOpAlpha;
1022 (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
1023 VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
1024 };
1025
1026 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
1027 {
1028 const bool isDouble = glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1029 const bool isFloat = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1030 const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1031 const bool isSigned = isDataTypeIntOrIVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1032 const bool isBool = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1033 const VkFormat colorFormat =
1034 (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT :
1035 (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT :
1036 (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT :
1037 (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT :
1038 VK_FORMAT_R32G32B32A32_UINT))));
1039
1040 {
1041 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(
1042 m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
1043 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
1044 TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
1045 }
1046
1047 const VkImageCreateInfo colorImageParams = {
1048 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1049 DE_NULL, // const void* pNext;
1050 0u, // VkImageCreateFlags flags;
1051 VK_IMAGE_TYPE_2D, // VkImageType imageType;
1052 colorFormat, // VkFormat format;
1053 {renderSize.x(), renderSize.y(), 1u}, // VkExtent3D extent;
1054 1u, // uint32_t mipLevels;
1055 1u, // uint32_t arraySize;
1056 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1057 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1058 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
1059 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1060 1u, // uint32_t queueFamilyCount;
1061 &queueFamilyIndex, // const uint32_t* pQueueFamilyIndices;
1062 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1063 };
1064
1065 const VkAttachmentDescription colorAttachmentDescription = {
1066 0u, // VkAttachmentDescriptorFlags flags;
1067 colorFormat, // VkFormat format;
1068 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1069 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
1070 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
1071 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
1072 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
1073 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
1074 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1075 };
1076
1077 Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1078 colorImages.push_back(de::SharedPtr<Unique<VkImage>>(new Unique<VkImage>(colorImage)));
1079 attachmentClearValues.push_back(getDefaultClearColor());
1080
1081 // Allocate and bind color image memory
1082 {
1083 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(
1084 getImageMemoryRequirements(vk, vkDevice, *((const VkImage *)colorImages.back().get())),
1085 MemoryRequirement::Any);
1086 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(),
1087 colorImageAlloc->getOffset()));
1088 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1089
1090 attachments.push_back(colorAttachmentDescription);
1091 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1092
1093 const VkAttachmentReference colorAttachmentReference = {
1094 (uint32_t)(colorImages.size() - 1), // uint32_t attachment;
1095 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1096 };
1097
1098 colorAttachmentReferences.push_back(colorAttachmentReference);
1099 }
1100
1101 // Create color attachment view
1102 {
1103 const VkImageViewCreateInfo colorImageViewParams = {
1104 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1105 DE_NULL, // const void* pNext;
1106 0u, // VkImageViewCreateFlags flags;
1107 colorImages.back().get()->get(), // VkImage image;
1108 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1109 colorFormat, // VkFormat format;
1110 {
1111 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1112 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1113 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1114 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1115 }, // VkComponentMapping components;
1116 {
1117 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1118 0u, // uint32_t baseMipLevel;
1119 1u, // uint32_t mipLevels;
1120 0u, // uint32_t baseArraySlice;
1121 1u // uint32_t arraySize;
1122 } // VkImageSubresourceRange subresourceRange;
1123 };
1124
1125 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1126 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView>>(new Unique<VkImageView>(colorImageView)));
1127
1128 const VkImageMemoryBarrier colorImagePreRenderBarrier = {
1129 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1130 DE_NULL, // pNext
1131 0u, // srcAccessMask
1132 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1133 VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout
1134 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // newLayout
1135 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1136 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1137 colorImages.back().get()->get(), // image
1138 {
1139 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1140 0u, // baseMipLevel
1141 1u, // levelCount
1142 0u, // baseArrayLayer
1143 1u, // layerCount
1144 } // subresourceRange
1145 };
1146 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1147
1148 const VkImageMemoryBarrier colorImagePostRenderBarrier = {
1149 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1150 DE_NULL, // pNext
1151 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1152 VK_ACCESS_TRANSFER_READ_BIT, // dstAccessMask
1153 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // oldLayout
1154 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // newLayout
1155 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1156 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1157 colorImages.back().get()->get(), // image
1158 {
1159 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1160 0u, // baseMipLevel
1161 1u, // levelCount
1162 0u, // baseArrayLayer
1163 1u, // layerCount
1164 } // subresourceRange
1165 };
1166 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1167 }
1168 }
1169 }
1170
1171 // Create render pass
1172 {
1173 const VkSubpassDescription subpassDescription = {
1174 0u, // VkSubpassDescriptionFlags flags;
1175 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1176 0u, // uint32_t inputCount;
1177 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1178 (uint32_t)colorImages.size(), // uint32_t colorCount;
1179 &colorAttachmentReferences[0], // const VkAttachmentReference* colorAttachments;
1180 DE_NULL, // const VkAttachmentReference* resolveAttachments;
1181 DE_NULL, // VkAttachmentReference depthStencilAttachment;
1182 0u, // uint32_t preserveCount;
1183 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
1184 };
1185
1186 const VkRenderPassCreateInfo renderPassParams = {
1187 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1188 DE_NULL, // const void* pNext;
1189 (VkRenderPassCreateFlags)0, // VkRenderPassCreateFlags flags;
1190 (uint32_t)attachments.size(), // uint32_t attachmentCount;
1191 &attachments[0], // const VkAttachmentDescription* pAttachments;
1192 1u, // uint32_t subpassCount;
1193 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1194 0u, // uint32_t dependencyCount;
1195 DE_NULL // const VkSubpassDependency* pDependencies;
1196 };
1197
1198 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1199 }
1200
1201 // Create framebuffer
1202 {
1203 std::vector<VkImageView> views(colorImageViews.size());
1204 for (size_t i = 0; i < colorImageViews.size(); i++)
1205 {
1206 views[i] = colorImageViews[i].get()->get();
1207 }
1208
1209 const VkFramebufferCreateInfo framebufferParams = {
1210 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1211 DE_NULL, // const void* pNext;
1212 0u, // VkFramebufferCreateFlags flags;
1213 *renderPass, // VkRenderPass renderPass;
1214 (uint32_t)views.size(), // uint32_t attachmentCount;
1215 &views[0], // const VkImageView* pAttachments;
1216 (uint32_t)renderSize.x(), // uint32_t width;
1217 (uint32_t)renderSize.y(), // uint32_t height;
1218 1u // uint32_t layers;
1219 };
1220
1221 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1222 }
1223
1224 // Create pipeline layout
1225 {
1226 const VkDescriptorSetLayout setLayouts[] = {*emptyDescriptorSetLayout, m_extraResourcesLayout};
1227 const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
1228 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1229 DE_NULL, // const void* pNext;
1230 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
1231 (m_extraResourcesLayout != 0 ? 2u : 0u), // uint32_t descriptorSetCount;
1232 setLayouts, // const VkDescriptorSetLayout* pSetLayouts;
1233 0u, // uint32_t pushConstantRangeCount;
1234 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
1235 };
1236
1237 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1238 }
1239
1240 // Create shaders
1241 {
1242 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1243 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1244
1245 if (useGeometryShader)
1246 {
1247 if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1248 geometryShaderModule =
1249 createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1250 else
1251 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1252 }
1253 }
1254
1255 // Create pipeline
1256 {
1257 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
1258 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1259 DE_NULL, // const void* pNext;
1260 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1261 (uint32_t)m_vertexBindingDescriptions.size(), // uint32_t bindingCount;
1262 &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1263 (uint32_t)m_vertexAttributeDescriptions.size(), // uint32_t attributeCount;
1264 &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1265 };
1266
1267 const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
1268 const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
1269
1270 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams = {
1271 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1272 DE_NULL, // const void* pNext;
1273 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
1274 VK_FALSE, // VkBool32 logicOpEnable;
1275 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
1276 (uint32_t)colorBlendAttachmentStates.size(), // uint32_t attachmentCount;
1277 &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1278 {0.0f, 0.0f, 0.0f, 0.0f} // float blendConst[4];
1279 };
1280
1281 graphicsPipeline = makeGraphicsPipeline(
1282 vk, // const DeviceInterface& vk
1283 vkDevice, // const VkDevice device
1284 *pipelineLayout, // const VkPipelineLayout pipelineLayout
1285 *vertexShaderModule, // const VkShaderModule vertexShaderModule
1286 DE_NULL, // const VkShaderModule tessellationControlShaderModule
1287 DE_NULL, // const VkShaderModule tessellationEvalShaderModule
1288 useGeometryShader ? *geometryShaderModule :
1289 DE_NULL, // const VkShaderModule geometryShaderModule
1290 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
1291 *renderPass, // const VkRenderPass renderPass
1292 viewports, // const std::vector<VkViewport>& viewports
1293 scissors, // const std::vector<VkRect2D>& scissors
1294 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology topology
1295 0u, // const uint32_t subpass
1296 0u, // const uint32_t patchControlPoints
1297 &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
1298 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1299 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
1300 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
1301 &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
1302 }
1303
1304 // Create command pool
1305 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1306
1307 // Create command buffer
1308 {
1309 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1310
1311 beginCommandBuffer(vk, *cmdBuffer);
1312
1313 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1314 vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0,
1315 (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL,
1316 (uint32_t)colorImagePreRenderBarriers.size(),
1317 colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1318 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
1319 (uint32_t)attachmentClearValues.size(), &attachmentClearValues[0]);
1320
1321 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1322
1323 if (m_extraResourcesLayout != 0)
1324 {
1325 DE_ASSERT(extraResources != 0);
1326 const VkDescriptorSet descriptorSets[] = {*emptyDescriptorSet, extraResources};
1327 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
1328 DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1329 }
1330 else
1331 DE_ASSERT(extraResources == 0);
1332
1333 const uint32_t numberOfVertexAttributes = (uint32_t)m_vertexBuffers.size();
1334
1335 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1336
1337 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1338 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1339 {
1340 buffers[i] = m_vertexBuffers[i].get()->get();
1341 }
1342
1343 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1344 vk.cmdDraw(*cmdBuffer, (uint32_t)positions.size(), 1u, 0u, 0u);
1345
1346 endRenderPass(vk, *cmdBuffer);
1347 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1348 vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0,
1349 (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL,
1350 (uint32_t)colorImagePostRenderBarriers.size(),
1351 colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1352
1353 endCommandBuffer(vk, *cmdBuffer);
1354 }
1355
1356 // Execute Draw
1357 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1358
1359 // Read back result and output
1360 {
1361 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(uint32_t) * renderSize.x() * renderSize.y());
1362 const VkBufferCreateInfo readImageBufferParams = {
1363 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1364 DE_NULL, // const void* pNext;
1365 0u, // VkBufferCreateFlags flags;
1366 imageSizeBytes, // VkDeviceSize size;
1367 VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
1368 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1369 1u, // uint32_t queueFamilyCount;
1370 &queueFamilyIndex, // const uint32_t* pQueueFamilyIndices;
1371 };
1372
1373 // constants for image copy
1374 Move<VkCommandPool> copyCmdPool =
1375 createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1376
1377 const VkBufferImageCopy copyParams = {
1378 0u, // VkDeviceSize bufferOffset;
1379 (uint32_t)renderSize.x(), // uint32_t bufferRowLength;
1380 (uint32_t)renderSize.y(), // uint32_t bufferImageHeight;
1381 {
1382 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspect aspect;
1383 0u, // uint32_t mipLevel;
1384 0u, // uint32_t arraySlice;
1385 1u, // uint32_t arraySize;
1386 }, // VkImageSubresource imageSubresource;
1387 {0u, 0u, 0u}, // VkOffset3D imageOffset;
1388 {renderSize.x(), renderSize.y(), 1u} // VkExtent3D imageExtent;
1389 };
1390
1391 // Read back pixels.
1392 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1393 {
1394 const Symbol &output = m_shaderSpec.outputs[outNdx];
1395 const int outSize = output.varType.getScalarSize();
1396 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
1397 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
1398 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1399
1400 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1401 {
1402 tcu::TextureLevel tmpBuf;
1403 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1404 const tcu::TextureFormat readFormat(tcu::TextureFormat::RGBA, format.type);
1405 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1406 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(
1407 getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1408
1409 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(),
1410 readImageBufferMemory->getOffset()));
1411
1412 // Copy image to buffer
1413 {
1414
1415 Move<VkCommandBuffer> copyCmdBuffer =
1416 allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1417
1418 beginCommandBuffer(vk, *copyCmdBuffer);
1419 vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(),
1420 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, ©Params);
1421
1422 // Insert a barrier so data written by the transfer is available to the host
1423 {
1424 const VkBufferMemoryBarrier barrier = {
1425 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1426 DE_NULL, // const void* pNext;
1427 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1428 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1429 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
1430 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
1431 *readImageBuffer, // VkBuffer buffer;
1432 0, // VkDeviceSize offset;
1433 VK_WHOLE_SIZE, // VkDeviceSize size;
1434 };
1435
1436 vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT,
1437 vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0,
1438 (const VkMemoryBarrier *)DE_NULL, 1, &barrier, 0,
1439 (const VkImageMemoryBarrier *)DE_NULL);
1440 }
1441
1442 endCommandBuffer(vk, *copyCmdBuffer);
1443
1444 submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1445 }
1446
1447 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1448
1449 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1450
1451 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1452 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1,
1453 readImageBufferMemory->getHostPtr());
1454
1455 tcu::copy(tmpBuf.getAccess(), resultAccess);
1456
1457 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1458 {
1459 uint16_t *dstPtrBase = static_cast<uint16_t *>(outputs[outNdx]);
1460 if (outSize == 4 && outNumLocs == 1)
1461 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1462 numValues * outVecSize * sizeof(uint16_t));
1463 else
1464 {
1465 for (int valNdx = 0; valNdx < numValues; valNdx++)
1466 {
1467 const uint16_t *srcPtr = (const uint16_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1468 uint16_t *dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1469 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint16_t));
1470 }
1471 }
1472 }
1473 else
1474 {
1475 uint32_t *dstPtrBase = static_cast<uint32_t *>(outputs[outNdx]);
1476 if (outSize == 4 && outNumLocs == 1)
1477 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1478 numValues * outVecSize * sizeof(uint32_t));
1479 else
1480 {
1481 for (int valNdx = 0; valNdx < numValues; valNdx++)
1482 {
1483 const uint32_t *srcPtr = (const uint32_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1484 uint32_t *dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1485 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint32_t));
1486 }
1487 }
1488 }
1489 }
1490 }
1491 }
1492 }
1493
1494 // VertexShaderExecutor
1495
1496 class VertexShaderExecutor : public FragmentOutExecutor
1497 {
1498 public:
1499 VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1500 virtual ~VertexShaderExecutor(void);
1501
1502 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &dst);
1503 };
1504
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1505 VertexShaderExecutor::VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1506 VkDescriptorSetLayout extraResourcesLayout)
1507 : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1508 {
1509 }
1510
~VertexShaderExecutor(void)1511 VertexShaderExecutor::~VertexShaderExecutor(void)
1512 {
1513 }
1514
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1515 void VertexShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1516 {
1517 const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1518
1519 programCollection.glslSources.add("vert")
1520 << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1521 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1522 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1523 shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1524 << shaderSpec.buildOptions;
1525 }
1526
1527 // GeometryShaderExecutor
1528
1529 class GeometryShaderExecutor : public FragmentOutExecutor
1530 {
1531 public:
1532 GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1533 virtual ~GeometryShaderExecutor(void);
1534
1535 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1536 };
1537
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1538 GeometryShaderExecutor::GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1539 VkDescriptorSetLayout extraResourcesLayout)
1540 : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1541 {
1542 const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
1543
1544 if (!features.geometryShader)
1545 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1546 }
1547
~GeometryShaderExecutor(void)1548 GeometryShaderExecutor::~GeometryShaderExecutor(void)
1549 {
1550 }
1551
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1552 void GeometryShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1553 {
1554 const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1555
1556 programCollection.glslSources.add("vert")
1557 << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1558
1559 programCollection.glslSources.add("geom")
1560 << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false))
1561 << shaderSpec.buildOptions;
1562 programCollection.glslSources.add("geom_point_size")
1563 << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true))
1564 << shaderSpec.buildOptions;
1565
1566 /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1567 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1568 shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_"))
1569 << shaderSpec.buildOptions;
1570 }
1571
1572 // FragmentShaderExecutor
1573
1574 class FragmentShaderExecutor : public FragmentOutExecutor
1575 {
1576 public:
1577 FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1578 virtual ~FragmentShaderExecutor(void);
1579
1580 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1581 };
1582
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1583 FragmentShaderExecutor::FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1584 VkDescriptorSetLayout extraResourcesLayout)
1585 : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1586 {
1587 }
1588
~FragmentShaderExecutor(void)1589 FragmentShaderExecutor::~FragmentShaderExecutor(void)
1590 {
1591 }
1592
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1593 void FragmentShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1594 {
1595 const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1596
1597 programCollection.glslSources.add("vert")
1598 << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1599 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1600 programCollection.glslSources.add("frag")
1601 << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1602 << shaderSpec.buildOptions;
1603 }
1604
1605 // Shared utilities for compute and tess executors
1606
getVecStd430ByteAlignment(glu::DataType type)1607 static uint32_t getVecStd430ByteAlignment(glu::DataType type)
1608 {
1609 uint32_t baseSize;
1610
1611 switch (glu::getDataTypeScalarType(type))
1612 {
1613 case glu::TYPE_FLOAT16:
1614 baseSize = 2u;
1615 break;
1616 case glu::TYPE_DOUBLE:
1617 baseSize = 8u;
1618 break;
1619 default:
1620 baseSize = 4u;
1621 break;
1622 }
1623
1624 switch (glu::getDataTypeScalarSize(type))
1625 {
1626 case 1:
1627 return baseSize;
1628 case 2:
1629 return baseSize * 2u;
1630 case 3: // fallthrough.
1631 case 4:
1632 return baseSize * 4u;
1633 default:
1634 DE_ASSERT(false);
1635 return 0u;
1636 }
1637 }
1638
1639 class BufferIoExecutor : public ShaderExecutor
1640 {
1641 public:
1642 BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec);
1643 virtual ~BufferIoExecutor(void);
1644
1645 protected:
1646 enum
1647 {
1648 INPUT_BUFFER_BINDING = 0,
1649 OUTPUT_BUFFER_BINDING = 1,
1650 };
1651
1652 void initBuffers(int numValues);
getInputBuffer(void) const1653 VkBuffer getInputBuffer(void) const
1654 {
1655 return *m_inputBuffer;
1656 }
getOutputBuffer(void) const1657 VkBuffer getOutputBuffer(void) const
1658 {
1659 return *m_outputBuffer;
1660 }
getInputStride(void) const1661 uint32_t getInputStride(void) const
1662 {
1663 return getLayoutStride(m_inputLayout);
1664 }
getOutputStride(void) const1665 uint32_t getOutputStride(void) const
1666 {
1667 return getLayoutStride(m_outputLayout);
1668 }
1669
1670 void uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit);
1671 void readOutputBuffer(void *const *outputPtrs, int numValues);
1672
1673 static void declareBufferBlocks(std::ostream &src, const ShaderSpec &spec);
1674 static void generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName);
1675
1676 protected:
1677 Move<VkBuffer> m_inputBuffer;
1678 Move<VkBuffer> m_outputBuffer;
1679
1680 private:
1681 struct VarLayout
1682 {
1683 uint32_t offset;
1684 uint32_t stride;
1685 uint32_t matrixStride;
1686
VarLayoutvkt::shaderexecutor::__anon4bec95a50111::BufferIoExecutor::VarLayout1687 VarLayout(void) : offset(0), stride(0), matrixStride(0)
1688 {
1689 }
1690 };
1691
1692 static void computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout);
1693 static uint32_t getLayoutStride(const vector<VarLayout> &layout);
1694
1695 static void copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1696 const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit);
1697 static void copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1698 const void *srcBasePtr, void *dstBasePtr);
1699
1700 de::MovePtr<Allocation> m_inputAlloc;
1701 de::MovePtr<Allocation> m_outputAlloc;
1702
1703 vector<VarLayout> m_inputLayout;
1704 vector<VarLayout> m_outputLayout;
1705 };
1706
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1707 BufferIoExecutor::BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec) : ShaderExecutor(context, shaderSpec)
1708 {
1709 computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1710 computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1711 }
1712
~BufferIoExecutor(void)1713 BufferIoExecutor::~BufferIoExecutor(void)
1714 {
1715 }
1716
getLayoutStride(const vector<VarLayout> & layout)1717 inline uint32_t BufferIoExecutor::getLayoutStride(const vector<VarLayout> &layout)
1718 {
1719 return layout.empty() ? 0 : layout[0].stride;
1720 }
1721
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1722 void BufferIoExecutor::computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout)
1723 {
1724 uint32_t maxAlignment = 0;
1725 uint32_t curOffset = 0;
1726
1727 DE_ASSERT(layout != DE_NULL);
1728 DE_ASSERT(layout->empty());
1729 layout->resize(symbols.size());
1730
1731 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1732 {
1733 const Symbol &symbol = symbols[varNdx];
1734 const glu::DataType basicType = symbol.varType.getBasicType();
1735 VarLayout &layoutEntry = (*layout)[varNdx];
1736
1737 if (glu::isDataTypeScalarOrVector(basicType))
1738 {
1739 const uint32_t alignment = getVecStd430ByteAlignment(basicType);
1740 const uint32_t size =
1741 (uint32_t)glu::getDataTypeScalarSize(basicType) *
1742 (isDataTypeDoubleType(basicType) ?
1743 (int)(sizeof(uint64_t)) :
1744 (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1745
1746 curOffset = (uint32_t)deAlign32((int)curOffset, (int)alignment);
1747 maxAlignment = de::max(maxAlignment, alignment);
1748
1749 layoutEntry.offset = curOffset;
1750 layoutEntry.matrixStride = 0;
1751
1752 curOffset += size;
1753 }
1754 else if (glu::isDataTypeMatrix(basicType))
1755 {
1756 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1757 const glu::DataType vecType =
1758 glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1759 const uint32_t vecAlignment = getVecStd430ByteAlignment(vecType);
1760
1761 curOffset = (uint32_t)deAlign32((int)curOffset, (int)vecAlignment);
1762 maxAlignment = de::max(maxAlignment, vecAlignment);
1763
1764 layoutEntry.offset = curOffset;
1765 layoutEntry.matrixStride = vecAlignment;
1766
1767 curOffset += vecAlignment * numVecs;
1768 }
1769 else
1770 DE_ASSERT(false);
1771 }
1772
1773 {
1774 const uint32_t totalSize = (uint32_t)deAlign32(curOffset, maxAlignment);
1775
1776 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1777 varIter->stride = totalSize;
1778 }
1779 }
1780
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1781 void BufferIoExecutor::declareBufferBlocks(std::ostream &src, const ShaderSpec &spec)
1782 {
1783 // Input struct
1784 if (!spec.inputs.empty())
1785 {
1786 glu::StructType inputStruct("Inputs");
1787 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1788 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1789 src << glu::declare(&inputStruct) << ";\n";
1790 }
1791
1792 // Output struct
1793 {
1794 glu::StructType outputStruct("Outputs");
1795 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1796 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1797 src << glu::declare(&outputStruct) << ";\n";
1798 }
1799
1800 src << "\n";
1801
1802 if (!spec.inputs.empty())
1803 {
1804 src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1805 << "{\n"
1806 << " Inputs inputs[];\n"
1807 << "};\n";
1808 }
1809
1810 src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1811 << "{\n"
1812 << " Outputs outputs[];\n"
1813 << "};\n"
1814 << "\n";
1815 }
1816
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1817 void BufferIoExecutor::generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName)
1818 {
1819 std::string tname;
1820 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1821 {
1822 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1823 if (f16BitTest)
1824 {
1825 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1826 }
1827 else
1828 {
1829 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1830 }
1831 src << "\t" << tname << " " << symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]."
1832 << symIter->name << ");\n";
1833 }
1834
1835 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1836 {
1837 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1838 if (f16BitTest)
1839 {
1840 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1841 }
1842 else
1843 {
1844 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1845 }
1846 src << "\t" << tname << " " << symIter->name << ";\n";
1847 if (f16BitTest)
1848 {
1849 const char *ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1850 src << "\t" << ttname << " "
1851 << "packed_" << symIter->name << ";\n";
1852 }
1853 }
1854
1855 src << "\n";
1856
1857 {
1858 std::istringstream opSrc(spec.source);
1859 std::string line;
1860
1861 while (std::getline(opSrc, line))
1862 src << "\t" << line << "\n";
1863 }
1864
1865 if (spec.packFloat16Bit)
1866 packFloat16Bit(src, spec.outputs);
1867
1868 src << "\n";
1869 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1870 {
1871 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1872 if (f16BitTest)
1873 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1874 else
1875 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1876 }
1877 }
1878
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1879 void BufferIoExecutor::copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1880 const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit)
1881 {
1882 if (varType.isBasicType())
1883 {
1884 const glu::DataType basicType = varType.getBasicType();
1885 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1886 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1887 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1888 const int numComps = scalarSize / numVecs;
1889 const int size = (glu::isDataTypeDoubleType(basicType) ?
1890 (int)sizeof(uint64_t) :
1891 (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1892
1893 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1894 {
1895 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1896 {
1897 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1898 const int dstOffset =
1899 layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1900 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1901 uint8_t *dstPtr = (uint8_t *)dstBasePtr + dstOffset;
1902
1903 if (packFloat16Bit)
1904 {
1905 // Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1906 for (int cmpNdx = 0; cmpNdx < numComps; ++cmpNdx)
1907 {
1908 deFloat16 f16vals[2] = {};
1909 f16vals[0] = deFloat32To16Round(((float *)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1910 deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1911 }
1912 }
1913 else
1914 {
1915 deMemcpy(dstPtr, srcPtr, size * numComps);
1916 }
1917 }
1918 }
1919 }
1920 else
1921 throw tcu::InternalError("Unsupported type");
1922 }
1923
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1924 void BufferIoExecutor::copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1925 const void *srcBasePtr, void *dstBasePtr)
1926 {
1927 if (varType.isBasicType())
1928 {
1929 const glu::DataType basicType = varType.getBasicType();
1930 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1931 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1932 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1933 const int numComps = scalarSize / numVecs;
1934
1935 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1936 {
1937 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1938 {
1939 const int size =
1940 (glu::isDataTypeDoubleType(basicType) ?
1941 (int)sizeof(uint64_t) :
1942 (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1943 const int srcOffset =
1944 layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1945 const int dstOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1946 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1947 uint8_t *dstPtr = (uint8_t *)dstBasePtr + dstOffset;
1948
1949 deMemcpy(dstPtr, srcPtr, size * numComps);
1950 }
1951 }
1952 }
1953 else
1954 throw tcu::InternalError("Unsupported type");
1955 }
1956
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1957 void BufferIoExecutor::uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit)
1958 {
1959 const VkDevice vkDevice = m_context.getDevice();
1960 const DeviceInterface &vk = m_context.getDeviceInterface();
1961
1962 const uint32_t inputStride = getLayoutStride(m_inputLayout);
1963 const int inputBufferSize = inputStride * numValues;
1964
1965 if (inputBufferSize == 0)
1966 return; // No inputs
1967
1968 DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1969 for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1970 {
1971 const glu::VarType &varType = m_shaderSpec.inputs[inputNdx].varType;
1972 const VarLayout &layout = m_inputLayout[inputNdx];
1973
1974 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1975 }
1976
1977 flushAlloc(vk, vkDevice, *m_inputAlloc);
1978 }
1979
readOutputBuffer(void * const * outputPtrs,int numValues)1980 void BufferIoExecutor::readOutputBuffer(void *const *outputPtrs, int numValues)
1981 {
1982 const VkDevice vkDevice = m_context.getDevice();
1983 const DeviceInterface &vk = m_context.getDeviceInterface();
1984
1985 DE_ASSERT(numValues > 0); // At least some outputs are required.
1986
1987 invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1988
1989 DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1990 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1991 {
1992 const glu::VarType &varType = m_shaderSpec.outputs[outputNdx].varType;
1993 const VarLayout &layout = m_outputLayout[outputNdx];
1994
1995 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1996 }
1997 }
1998
initBuffers(int numValues)1999 void BufferIoExecutor::initBuffers(int numValues)
2000 {
2001 const uint32_t inputStride = getLayoutStride(m_inputLayout);
2002 const uint32_t outputStride = getLayoutStride(m_outputLayout);
2003 // Avoid creating zero-sized buffer/memory
2004 const size_t inputBufferSize = de::max(numValues * inputStride, 1u);
2005 const size_t outputBufferSize = numValues * outputStride;
2006
2007 // Upload data to buffer
2008 const VkDevice vkDevice = m_context.getDevice();
2009 const DeviceInterface &vk = m_context.getDeviceInterface();
2010 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2011 Allocator &memAlloc = m_context.getDefaultAllocator();
2012
2013 const VkBufferCreateInfo inputBufferParams = {
2014 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2015 DE_NULL, // const void* pNext;
2016 0u, // VkBufferCreateFlags flags;
2017 inputBufferSize, // VkDeviceSize size;
2018 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
2019 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2020 1u, // uint32_t queueFamilyCount;
2021 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
2022 };
2023
2024 m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
2025 m_inputAlloc =
2026 memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
2027
2028 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
2029
2030 const VkBufferCreateInfo outputBufferParams = {
2031 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2032 DE_NULL, // const void* pNext;
2033 0u, // VkBufferCreateFlags flags;
2034 outputBufferSize, // VkDeviceSize size;
2035 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
2036 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2037 1u, // uint32_t queueFamilyCount;
2038 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
2039 };
2040
2041 m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
2042 m_outputAlloc =
2043 memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
2044
2045 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
2046 }
2047
2048 // ComputeShaderExecutor
2049
2050 class ComputeShaderExecutor : public BufferIoExecutor
2051 {
2052 public:
2053 ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2054 virtual ~ComputeShaderExecutor(void);
2055
2056 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
2057
2058 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2059 VkDescriptorSet extraResources);
2060
2061 protected:
2062 static std::string generateComputeShader(const ShaderSpec &spec);
2063
2064 private:
2065 const VkDescriptorSetLayout m_extraResourcesLayout;
2066 };
2067
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2068 ComputeShaderExecutor::ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
2069 VkDescriptorSetLayout extraResourcesLayout)
2070 : BufferIoExecutor(context, shaderSpec)
2071 , m_extraResourcesLayout(extraResourcesLayout)
2072 {
2073 }
2074
~ComputeShaderExecutor(void)2075 ComputeShaderExecutor::~ComputeShaderExecutor(void)
2076 {
2077 }
2078
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)2079 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
2080 {
2081 switch (type)
2082 {
2083 case glu::TYPE_FLOAT16:
2084 return "%f16";
2085 case glu::TYPE_FLOAT16_VEC2:
2086 return "%v2f16";
2087 case glu::TYPE_FLOAT16_VEC3:
2088 return "%v3f16";
2089 case glu::TYPE_FLOAT16_VEC4:
2090 return "%v4f16";
2091 case glu::TYPE_FLOAT:
2092 return packFloat16Bit ? "%u32" : "%f32"; // f16 values will be bitcast from ui32.
2093 case glu::TYPE_FLOAT_VEC2:
2094 return packFloat16Bit ? "%v2u32" : "%v2f32"; // f16 values will be bitcast from ui32.
2095 case glu::TYPE_FLOAT_VEC3:
2096 return packFloat16Bit ? "%v3u32" : "%v3f32"; // f16 values will be bitcast from ui32.
2097 case glu::TYPE_FLOAT_VEC4:
2098 return packFloat16Bit ? "%v4u32" : "%v4f32"; // f16 values will be bitcast from ui32.
2099 case glu::TYPE_INT:
2100 return "%i32";
2101 case glu::TYPE_INT_VEC2:
2102 return "%v2i32";
2103 case glu::TYPE_INT_VEC3:
2104 return "%v3i32";
2105 case glu::TYPE_INT_VEC4:
2106 return "%v4i32";
2107 case glu::TYPE_DOUBLE:
2108 return "%f64";
2109 case glu::TYPE_DOUBLE_VEC2:
2110 return "%v2f64";
2111 case glu::TYPE_DOUBLE_VEC3:
2112 return "%v3f64";
2113 case glu::TYPE_DOUBLE_VEC4:
2114 return "%v4f64";
2115 default:
2116 DE_ASSERT(0);
2117 return "";
2118 }
2119 }
2120
moveBitOperation(std::string variableName,const int operationNdx)2121 std::string moveBitOperation(std::string variableName, const int operationNdx)
2122 {
2123 std::ostringstream src;
2124 src << "\n"
2125 << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2126 << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_" << operationNdx << " %c_i32_1\n"
2127 << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2128 return src.str();
2129 }
2130
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2131 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type,
2132 const std::string &outputType, const int scalarSize)
2133 {
2134 std::ostringstream src;
2135 std::string boolType;
2136
2137 switch (type)
2138 {
2139 case glu::TYPE_FLOAT16:
2140 case glu::TYPE_FLOAT:
2141 case glu::TYPE_DOUBLE:
2142 src << "\n"
2143 << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2144 << "OpSelectionMerge %IF_" << operationNdx << " None\n"
2145 << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_"
2146 << operationNdx << "\n"
2147 << "%label_IF_" << operationNdx << " = OpLabel\n"
2148 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2149 << "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2150 << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_"
2151 << operationNdx << "\n"
2152 << "OpStore %out0 %add_if_" << operationNdx << "\n"
2153 << "OpBranch %IF_" << operationNdx << "\n"
2154 << "%IF_" << operationNdx << " = OpLabel\n";
2155 return src.str();
2156 case glu::TYPE_FLOAT16_VEC2:
2157 case glu::TYPE_FLOAT_VEC2:
2158 case glu::TYPE_DOUBLE_VEC2:
2159 boolType = "%v2bool";
2160 break;
2161 case glu::TYPE_FLOAT16_VEC3:
2162 case glu::TYPE_FLOAT_VEC3:
2163 case glu::TYPE_DOUBLE_VEC3:
2164 boolType = "%v3bool";
2165 break;
2166 case glu::TYPE_FLOAT16_VEC4:
2167 case glu::TYPE_FLOAT_VEC4:
2168 case glu::TYPE_DOUBLE_VEC4:
2169 boolType = "%v4bool";
2170 break;
2171 default:
2172 DE_ASSERT(0);
2173 return "";
2174 }
2175
2176 src << "\n"
2177 << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2178 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx
2179 << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2180 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2181
2182 src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2183 for (int ndx = 0; ndx < scalarSize; ++ndx)
2184 src << " %operation_val_" << operationNdx;
2185 src << "\n";
2186
2187 src << "%toAdd" << operationNdx << " = OpIMul " << outputType << " %ivec_result_" << operationNdx
2188 << " %operation_vec_" << operationNdx << "\n"
2189 << "%out_val_" << operationNdx << " = OpLoad " << outputType << " %out0\n"
2190
2191 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd"
2192 << operationNdx << "\n"
2193 << "OpStore %out0 %add_if_" << operationNdx << "\n";
2194
2195 return src.str();
2196 }
2197
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2198 std::string generateSpirv(const ShaderSpec &spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2199 {
2200 static const std::string COMPARE_OPERATIONS[] = {"OpFOrdEqual",
2201 "OpFOrdGreaterThan",
2202 "OpFOrdLessThan",
2203 "OpFOrdGreaterThanEqual",
2204 "OpFOrdLessThanEqual",
2205 "OpFUnordEqual",
2206 "OpFUnordGreaterThan",
2207 "OpFUnordLessThan",
2208 "OpFUnordGreaterThanEqual",
2209 "OpFUnordLessThanEqual"};
2210
2211 int moveBitNdx = 0;
2212 vector<std::string> inputTypes;
2213 vector<std::string> outputTypes;
2214 const std::string packType =
2215 spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2216
2217 vector<bool> floatResult;
2218 for (const auto &symbol : spec.outputs)
2219 floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2220
2221 const bool anyFloatResult = std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2222
2223 vector<bool> packFloatRes;
2224 for (const auto &floatRes : floatResult)
2225 packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2226
2227 const bool useF32Types = (!are16Bit && !are64Bit);
2228 const bool useF64Types = are64Bit;
2229 const bool useF16Types = (spec.packFloat16Bit || are16Bit);
2230
2231 for (const auto &symbol : spec.inputs)
2232 inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2233
2234 for (const auto &symbol : spec.outputs)
2235 outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2236
2237 DE_ASSERT(!inputTypes.empty());
2238 DE_ASSERT(!outputTypes.empty());
2239
2240 // Assert input and output types match the expected operations.
2241 switch (spec.spirvCase)
2242 {
2243 case SPIRV_CASETYPE_COMPARE:
2244 case SPIRV_CASETYPE_FREM:
2245 DE_ASSERT(inputTypes.size() == 2);
2246 DE_ASSERT(outputTypes.size() == 1);
2247 break;
2248 case SPIRV_CASETYPE_MODFSTRUCT:
2249 case SPIRV_CASETYPE_FREXPSTRUCT:
2250 DE_ASSERT(inputTypes.size() == 1);
2251 DE_ASSERT(outputTypes.size() == 2);
2252 break;
2253 default:
2254 DE_ASSERT(false);
2255 break;
2256 }
2257
2258 std::ostringstream src;
2259 src << "; SPIR-V\n"
2260 "; Version: 1.0\n"
2261 "; Generator: Khronos Glslang Reference Front End; 4\n"
2262 "; Bound: 114\n"
2263 "; Schema: 0\n"
2264 "OpCapability Shader\n";
2265
2266 if (useF16Types)
2267 src << "OpCapability Float16\n";
2268
2269 if (are16Bit)
2270 src << "OpCapability StorageBuffer16BitAccess\n"
2271 "OpCapability UniformAndStorageBuffer16BitAccess\n";
2272
2273 if (useF64Types)
2274 src << "OpCapability Float64\n";
2275
2276 if (are16Bit)
2277 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2278
2279 src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2280 "OpMemoryModel Logical GLSL450\n"
2281 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2282 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2283 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2284 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2285
2286 // Input offsets and stride.
2287 {
2288 int offset = 0;
2289 int ndx = 0;
2290 int largest = 0;
2291 for (const auto &symbol : spec.inputs)
2292 {
2293 const int scalarSize = symbol.varType.getScalarSize();
2294 const int memberSize =
2295 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2296 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2297 (int)sizeof(uint64_t) :
2298 (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2299 (int)sizeof(uint32_t)));
2300 const int extraMemberBytes = (offset % memberSize);
2301
2302 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2303 src << "OpMemberDecorate %SSB0_IN " << ndx << " Offset " << offset << "\n";
2304 ++ndx;
2305
2306 if (memberSize > largest)
2307 largest = memberSize;
2308
2309 offset += memberSize;
2310 }
2311 DE_ASSERT(largest > 0);
2312 const int extraBytes = (offset % largest);
2313 const int stride = offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2314 src << "OpDecorate %up_SSB0_IN ArrayStride " << stride << "\n";
2315 }
2316
2317 src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2318 "OpDecorate %ssboIN BufferBlock\n"
2319 "OpDecorate %ssbo_src DescriptorSet 0\n"
2320 "OpDecorate %ssbo_src Binding 0\n"
2321 "\n";
2322
2323 if (isMediump)
2324 {
2325 for (size_t i = 0; i < inputTypes.size(); ++i)
2326 {
2327 src << "OpMemberDecorate %SSB0_IN " << i
2328 << " RelaxedPrecision\n"
2329 "OpDecorate %in"
2330 << i
2331 << " RelaxedPrecision\n"
2332 "OpDecorate %src_val_0_"
2333 << i
2334 << " RelaxedPrecision\n"
2335 "OpDecorate %in"
2336 << i << "_val RelaxedPrecision\n";
2337 }
2338
2339 if (anyFloatResult)
2340 {
2341 switch (spec.spirvCase)
2342 {
2343 case SPIRV_CASETYPE_FREM:
2344 src << "OpDecorate %frem_result RelaxedPrecision\n";
2345 break;
2346 case SPIRV_CASETYPE_MODFSTRUCT:
2347 src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2348 break;
2349 case SPIRV_CASETYPE_FREXPSTRUCT:
2350 src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2351 break;
2352 default:
2353 DE_ASSERT(false);
2354 break;
2355 }
2356
2357 for (size_t i = 0; i < outputTypes.size(); ++i)
2358 {
2359 src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2360 src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2361 src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2362 }
2363 }
2364 }
2365
2366 // Output offsets and stride.
2367 {
2368 int offset = 0;
2369 int ndx = 0;
2370 int largest = 0;
2371 for (const auto &symbol : spec.outputs)
2372 {
2373 const int scalarSize = symbol.varType.getScalarSize();
2374 const int memberSize =
2375 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2376 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2377 (int)sizeof(uint64_t) :
2378 (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2379 (int)sizeof(uint32_t)));
2380 const int extraMemberBytes = (offset % memberSize);
2381
2382 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2383 src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2384 ++ndx;
2385
2386 if (memberSize > largest)
2387 largest = memberSize;
2388
2389 offset += memberSize;
2390 }
2391 DE_ASSERT(largest > 0);
2392 const int extraBytes = (offset % largest);
2393 const int stride = offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2394 src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2395 }
2396
2397 src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2398 "OpDecorate %ssboOUT BufferBlock\n"
2399 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2400 "OpDecorate %ssbo_dst Binding 1\n"
2401 "\n"
2402 "%void = OpTypeVoid\n"
2403 "%bool = OpTypeBool\n"
2404 "%v2bool = OpTypeVector %bool 2\n"
2405 "%v3bool = OpTypeVector %bool 3\n"
2406 "%v4bool = OpTypeVector %bool 4\n"
2407 "%u32 = OpTypeInt 32 0\n";
2408
2409 if (useF32Types)
2410 src << "%f32 = OpTypeFloat 32\n"
2411 "%v2f32 = OpTypeVector %f32 2\n"
2412 "%v3f32 = OpTypeVector %f32 3\n"
2413 "%v4f32 = OpTypeVector %f32 4\n";
2414
2415 if (useF64Types)
2416 src << "%f64 = OpTypeFloat 64\n"
2417 "%v2f64 = OpTypeVector %f64 2\n"
2418 "%v3f64 = OpTypeVector %f64 3\n"
2419 "%v4f64 = OpTypeVector %f64 4\n";
2420
2421 if (useF16Types)
2422 src << "%f16 = OpTypeFloat 16\n"
2423 "%v2f16 = OpTypeVector %f16 2\n"
2424 "%v3f16 = OpTypeVector %f16 3\n"
2425 "%v4f16 = OpTypeVector %f16 4\n";
2426
2427 src << "%i32 = OpTypeInt 32 1\n"
2428 "%v2i32 = OpTypeVector %i32 2\n"
2429 "%v3i32 = OpTypeVector %i32 3\n"
2430 "%v4i32 = OpTypeVector %i32 4\n"
2431 "%v2u32 = OpTypeVector %u32 2\n"
2432 "%v3u32 = OpTypeVector %u32 3\n"
2433 "%v4u32 = OpTypeVector %u32 4\n"
2434 "\n"
2435 "%ip_u32 = OpTypePointer Input %u32\n"
2436 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2437 "%up_float = OpTypePointer Uniform "
2438 << inputTypes[0]
2439 << "\n"
2440 "\n"
2441 "%fp_operation = OpTypePointer Function %i32\n"
2442 "%voidf = OpTypeFunction %void\n"
2443 "%fp_u32 = OpTypePointer Function %u32\n"
2444 "%fp_it1 = OpTypePointer Function "
2445 << inputTypes[0] << "\n";
2446
2447 for (size_t i = 0; i < outputTypes.size(); ++i)
2448 {
2449 src << "%fp_out_" << i << " = OpTypePointer Function " << outputTypes[i] << "\n"
2450 << "%up_out_" << i << " = OpTypePointer Uniform " << outputTypes[i] << "\n";
2451 }
2452
2453 if (spec.packFloat16Bit)
2454 src << "%fp_f16 = OpTypePointer Function " << packType << "\n";
2455
2456 src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2457 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2458 "\n"
2459 "%c_u32_0 = OpConstant %u32 0\n"
2460 "%c_u32_1 = OpConstant %u32 1\n"
2461 "%c_u32_2 = OpConstant %u32 2\n"
2462 "%c_i32_0 = OpConstant %i32 0\n"
2463 "%c_i32_1 = OpConstant %i32 1\n"
2464 "\n";
2465
2466 if (useF32Types)
2467 src << "%c_f32_0 = OpConstant %f32 0\n"
2468 "%c_f32_1 = OpConstant %f32 1\n";
2469
2470 if (useF16Types)
2471 src << "%c_f16_0 = OpConstant %f16 0\n"
2472 "%c_f16_1 = OpConstant %f16 1\n"
2473 "%c_f16_minus1 = OpConstant %f16 -0x1p+0";
2474
2475 if (useF64Types)
2476 src << "%c_f64_0 = OpConstant %f64 0\n"
2477 "%c_f64_1 = OpConstant %f64 1\n";
2478
2479 src << "\n"
2480 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2481 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2482 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2483 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2484 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2485 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2486 "\n";
2487
2488 if (useF32Types)
2489 src << "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2490 "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2491 "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2492 "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2493 "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2494 "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n";
2495
2496 if (useF16Types)
2497 src << "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2498 "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2499 "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2500 "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2501 "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2502 "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n";
2503
2504 if (useF64Types)
2505 src << "%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2506 "%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2507 "%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2508 "%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2509 "%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2510 "%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2511 "\n";
2512
2513 // Input struct.
2514 {
2515 src << "%SSB0_IN = OpTypeStruct";
2516 for (const auto &t : inputTypes)
2517 src << " " << t;
2518 src << "\n";
2519 }
2520
2521 src << "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2522 "%ssboIN = OpTypeStruct %up_SSB0_IN\n"
2523 "%up_ssboIN = OpTypePointer Uniform %ssboIN\n"
2524 "%ssbo_src = OpVariable %up_ssboIN Uniform\n"
2525 "\n";
2526
2527 // Output struct.
2528 {
2529 src << "%SSB0_OUT = OpTypeStruct";
2530 for (const auto &t : outputTypes)
2531 src << " " << t;
2532 src << "\n";
2533 }
2534
2535 std::string modfStructMemberType;
2536 std::string frexpStructFirstMemberType;
2537 if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2538 {
2539 modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2540 src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2541 }
2542 else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2543 {
2544 frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2545 src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2546 }
2547
2548 src << "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2549 "%ssboOUT = OpTypeStruct %up_SSB0_OUT\n"
2550 "%up_ssboOUT = OpTypePointer Uniform %ssboOUT\n"
2551 "%ssbo_dst = OpVariable %up_ssboOUT Uniform\n"
2552 "\n"
2553 "%BP_main = OpFunction %void None %voidf\n"
2554 "%BP_label = OpLabel\n"
2555 "%invocationNdx = OpVariable %fp_u32 Function\n";
2556
2557 // Note: here we are supposing all inputs have the same type.
2558 for (size_t i = 0; i < inputTypes.size(); ++i)
2559 src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2560
2561 for (size_t i = 0; i < outputTypes.size(); ++i)
2562 src << "%out" << i << " = OpVariable "
2563 << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2564
2565 src << "%operation = OpVariable %fp_operation Function\n"
2566 "%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2567 "%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2568 "%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2569 "%BP_num_0_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2570 "%BP_num_1_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2571 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2572 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2573 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2574 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2575 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2576 "\n"
2577 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2578 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2579 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2580 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2581 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2582 "OpStore %invocationNdx %add_2\n"
2583 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2584
2585 // Load input values.
2586 for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2587 {
2588 src << "\n"
2589 << "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_"
2590 << inputNdx << "\n"
2591 << "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2592
2593 if (spec.packFloat16Bit)
2594 {
2595 if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2596 {
2597 // Extract the val<inputNdx> u32 input channels into individual f16 values.
2598 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2599 {
2600 src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx
2601 << " " << i
2602 << "\n"
2603 "%val_v2f16_0_"
2604 << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i
2605 << "\n"
2606 "%val_f16_0_"
2607 << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i
2608 << " 0\n";
2609 }
2610
2611 // Construct the input vector.
2612 src << "%val_f16_0_" << inputNdx << " = OpCompositeConstruct " << packType;
2613 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2614 {
2615 src << " %val_f16_0_" << inputNdx << "_" << i;
2616 }
2617
2618 src << "\n";
2619 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2620 }
2621 else
2622 {
2623 src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx
2624 << "\n"
2625 "%val_f16_0_"
2626 << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2627
2628 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2629 }
2630 }
2631 else
2632 src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2633
2634 src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx])
2635 << " %in" << inputNdx << "\n";
2636 }
2637
2638 src << "\n"
2639 "OpStore %operation %c_i32_1\n";
2640
2641 // Fill output values with dummy data.
2642 for (size_t i = 0; i < outputTypes.size(); ++i)
2643 src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2644
2645 src << "\n";
2646
2647 // Run operation.
2648 switch (spec.spirvCase)
2649 {
2650 case SPIRV_CASETYPE_COMPARE:
2651 for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2652 {
2653 src << scalarComparison(COMPARE_OPERATIONS[operationNdx], operationNdx,
2654 spec.inputs[0].varType.getBasicType(), outputTypes[0],
2655 spec.outputs[0].varType.getScalarSize());
2656 src << moveBitOperation("%operation", moveBitNdx);
2657 ++moveBitNdx;
2658 }
2659 break;
2660 case SPIRV_CASETYPE_FREM:
2661 src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2662 << "OpStore %out0 %frem_result\n";
2663 break;
2664 case SPIRV_CASETYPE_MODFSTRUCT:
2665 src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2666 << "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2667 << "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2668 << "OpStore %out0 %modfstruct_result_0\n"
2669 << "OpStore %out1 %modfstruct_result_1\n";
2670 break;
2671 case SPIRV_CASETYPE_FREXPSTRUCT:
2672 src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2673 << "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2674 << "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2675 << "OpStore %out0 %frexpstruct_result_0\n"
2676 << "OpStore %out1 %frexpstruct_result_1\n";
2677 break;
2678 default:
2679 DE_ASSERT(false);
2680 break;
2681 }
2682
2683 for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2684 {
2685 src << "\n"
2686 "%out_val_final_"
2687 << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out"
2688 << outputNdx
2689 << "\n"
2690 "%ssbo_dst_ptr_"
2691 << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_"
2692 << outputNdx << "\n";
2693
2694 if (packFloatRes[outputNdx])
2695 {
2696 if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2697 {
2698 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2699 {
2700 src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_"
2701 << outputNdx << " " << i << "\n";
2702 src << "%out_composite_" << outputNdx << "_" << i
2703 << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i
2704 << " %c_f16_minus1\n";
2705 src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx
2706 << "_" << i << "\n";
2707 }
2708
2709 src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2710 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2711 src << " %u32_val_" << outputNdx << "_" << i;
2712 src << "\n";
2713 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2714 }
2715 else
2716 {
2717 src << "%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx
2718 << " %c_f16_minus1\n"
2719 "%out_result_"
2720 << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx
2721 << "\n"
2722 "OpStore %ssbo_dst_ptr_"
2723 << outputNdx << " %out_result_" << outputNdx << "\n";
2724 }
2725 }
2726 else
2727 {
2728 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2729 }
2730 }
2731
2732 src << "\n"
2733 "OpReturn\n"
2734 "OpFunctionEnd\n";
2735
2736 return src.str();
2737 }
2738
generateComputeShader(const ShaderSpec & spec)2739 std::string ComputeShaderExecutor::generateComputeShader(const ShaderSpec &spec)
2740 {
2741 if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2742 {
2743 bool are16Bit = false;
2744 bool are64Bit = false;
2745 bool isMediump = false;
2746 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2747 {
2748 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2749 are16Bit = true;
2750
2751 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2752 are64Bit = true;
2753
2754 if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2755 isMediump = true;
2756
2757 if (isMediump && are16Bit)
2758 break;
2759 }
2760
2761 return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2762 }
2763 else
2764 {
2765 std::ostringstream src;
2766 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2767
2768 if (!spec.globalDeclarations.empty())
2769 src << spec.globalDeclarations << "\n";
2770
2771 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2772 << "\n";
2773
2774 declareBufferBlocks(src, spec);
2775
2776 src << "void main (void)\n"
2777 << "{\n"
2778 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2779 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2780
2781 generateExecBufferIo(src, spec, "invocationNdx");
2782
2783 src << "}\n";
2784
2785 return src.str();
2786 }
2787 }
2788
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2789 void ComputeShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
2790 {
2791 if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2792 programCollection.spirvAsmSources.add("compute")
2793 << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3)
2794 << generateComputeShader(shaderSpec);
2795 else
2796 programCollection.glslSources.add("compute")
2797 << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2798 }
2799
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2800 void ComputeShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
2801 VkDescriptorSet extraResources)
2802 {
2803 const VkDevice vkDevice = m_context.getDevice();
2804 const DeviceInterface &vk = m_context.getDeviceInterface();
2805 const VkQueue queue = m_context.getUniversalQueue();
2806 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2807
2808 DescriptorPoolBuilder descriptorPoolBuilder;
2809 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2810
2811 Move<VkShaderModule> computeShaderModule;
2812 Move<VkPipeline> computePipeline;
2813 Move<VkPipelineLayout> pipelineLayout;
2814 Move<VkCommandPool> cmdPool;
2815 Move<VkDescriptorPool> descriptorPool;
2816 Move<VkDescriptorSetLayout> descriptorSetLayout;
2817 Move<VkDescriptorSet> descriptorSet;
2818 const uint32_t numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2819
2820 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2821
2822 initBuffers(numValues);
2823
2824 // Setup input buffer & copy data
2825 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2826 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2827 // the shader.
2828 uploadInputBuffer(inputs, numValues,
2829 m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2830
2831 // Create command pool
2832 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2833
2834 // Create command buffer
2835
2836 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2837 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2838 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2839 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2840
2841 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2842 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2843
2844 const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL,
2845 *descriptorPool, 1u, &*descriptorSetLayout};
2846
2847 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2848
2849 // Create pipeline layout
2850 {
2851 const VkDescriptorSetLayout descriptorSetLayouts[] = {*descriptorSetLayout, m_extraResourcesLayout};
2852 const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
2853 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2854 DE_NULL, // const void* pNext;
2855 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2856 numDescriptorSets, // uint32_t CdescriptorSetCount;
2857 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2858 0u, // uint32_t pushConstantRangeCount;
2859 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2860 };
2861
2862 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2863 }
2864
2865 // Create shaders
2866 {
2867 computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2868 }
2869
2870 // create pipeline
2871 {
2872 const VkPipelineShaderStageCreateInfo shaderStageParams[1] = {{
2873 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2874 DE_NULL, // const void* pNext;
2875 (VkPipelineShaderStageCreateFlags)0u, // VkPipelineShaderStageCreateFlags flags;
2876 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagsBit stage;
2877 *computeShaderModule, // VkShaderModule shader;
2878 "main", // const char* pName;
2879 DE_NULL // const VkSpecializationInfo* pSpecializationInfo;
2880 }};
2881
2882 const VkComputePipelineCreateInfo computePipelineParams = {
2883 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2884 DE_NULL, // const void* pNext;
2885 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2886 *shaderStageParams, // VkPipelineShaderStageCreateInfo cs;
2887 *pipelineLayout, // VkPipelineLayout layout;
2888 0u, // VkPipeline basePipelineHandle;
2889 0u, // int32_t basePipelineIndex;
2890 };
2891
2892 computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2893 }
2894
2895 const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2896 int curOffset = 0;
2897 const uint32_t inputStride = getInputStride();
2898 const uint32_t outputStride = getOutputStride();
2899
2900 while (curOffset < numValues)
2901 {
2902 Move<VkCommandBuffer> cmdBuffer;
2903 const int numToExec = de::min(maxValuesPerInvocation, numValues - curOffset);
2904
2905 // Update descriptors
2906 {
2907 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2908
2909 const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
2910 *m_outputBuffer, // VkBuffer buffer;
2911 curOffset * outputStride, // VkDeviceSize offset;
2912 numToExec * outputStride // VkDeviceSize range;
2913 };
2914
2915 descriptorSetUpdateBuilder.writeSingle(
2916 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
2917 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2918
2919 if (inputStride)
2920 {
2921 const VkDescriptorBufferInfo inputDescriptorBufferInfo = {
2922 *m_inputBuffer, // VkBuffer buffer;
2923 curOffset * inputStride, // VkDeviceSize offset;
2924 numToExec * inputStride // VkDeviceSize range;
2925 };
2926
2927 descriptorSetUpdateBuilder.writeSingle(
2928 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
2929 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2930 }
2931
2932 descriptorSetUpdateBuilder.update(vk, vkDevice);
2933 }
2934
2935 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2936 beginCommandBuffer(vk, *cmdBuffer);
2937 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2938
2939 {
2940 const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
2941 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets,
2942 descriptorSets, 0u, DE_NULL);
2943 }
2944
2945 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2946
2947 // Insert a barrier so data written by the shader is available to the host
2948 {
2949 const VkBufferMemoryBarrier bufferBarrier = {
2950 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
2951 DE_NULL, // const void* pNext;
2952 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
2953 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
2954 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
2955 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
2956 *m_outputBuffer, // VkBuffer buffer;
2957 0, // VkDeviceSize offset;
2958 VK_WHOLE_SIZE, // VkDeviceSize size;
2959 };
2960
2961 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
2962 (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
2963 (const VkImageMemoryBarrier *)DE_NULL);
2964 }
2965
2966 endCommandBuffer(vk, *cmdBuffer);
2967
2968 curOffset += numToExec;
2969
2970 // Execute
2971 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2972 }
2973
2974 // Read back data
2975 readOutputBuffer(outputs, numValues);
2976 }
2977
2978 #ifndef CTS_USES_VULKANSC
2979 // MeshTaskShaderExecutor
2980
2981 class MeshTaskShaderExecutor : public BufferIoExecutor
2982 {
2983 public:
2984 MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2985 virtual ~MeshTaskShaderExecutor(void);
2986
2987 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection, bool useTask);
2988
2989 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2990 VkDescriptorSet extraResources);
2991
2992 protected:
2993 static std::string generateMeshShader(const ShaderSpec &spec, bool useTask);
2994 static std::string generateTaskShader(const ShaderSpec &spec);
2995
2996 private:
2997 const VkDescriptorSetLayout m_extraResourcesLayout;
2998 };
2999
MeshTaskShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3000 MeshTaskShaderExecutor::MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
3001 VkDescriptorSetLayout extraResourcesLayout)
3002 : BufferIoExecutor(context, shaderSpec)
3003 , m_extraResourcesLayout(extraResourcesLayout)
3004 {
3005 }
3006
~MeshTaskShaderExecutor(void)3007 MeshTaskShaderExecutor::~MeshTaskShaderExecutor(void)
3008 {
3009 }
3010
generateMeshShader(const ShaderSpec & spec,bool useTask)3011 std::string MeshTaskShaderExecutor::generateMeshShader(const ShaderSpec &spec, bool useTask)
3012 {
3013 DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_NONE);
3014
3015 std::ostringstream src;
3016
3017 if (useTask)
3018 {
3019 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3020 << "#extension GL_EXT_mesh_shader : enable\n"
3021 << "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
3022 << "layout(points) out;\n"
3023 << "layout(max_vertices=1, max_primitives=1) out;\n"
3024 << "\n"
3025 << "void main (void)\n"
3026 << "{\n"
3027 << " SetMeshOutputsEXT(0u, 0u);\n"
3028 << "}\n";
3029 }
3030 else
3031 {
3032 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3033 << "#extension GL_EXT_mesh_shader : enable\n";
3034
3035 if (!spec.globalDeclarations.empty())
3036 src << spec.globalDeclarations << "\n";
3037
3038 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3039 << "layout(points) out;\n"
3040 << "layout(max_vertices=1, max_primitives=1) out;\n"
3041 << "\n";
3042
3043 declareBufferBlocks(src, spec);
3044
3045 src << "void main (void)\n"
3046 << "{\n"
3047 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3048 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3049
3050 generateExecBufferIo(src, spec, "invocationNdx");
3051
3052 src << " SetMeshOutputsEXT(0u, 0u);\n"
3053 << "}\n";
3054 }
3055
3056 return src.str();
3057 }
3058
generateTaskShader(const ShaderSpec & spec)3059 std::string MeshTaskShaderExecutor::generateTaskShader(const ShaderSpec &spec)
3060 {
3061 std::ostringstream src;
3062
3063 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3064 << "#extension GL_EXT_mesh_shader : enable\n";
3065
3066 if (!spec.globalDeclarations.empty())
3067 src << spec.globalDeclarations << "\n";
3068
3069 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3070 << "\n";
3071
3072 declareBufferBlocks(src, spec);
3073
3074 src << "void main (void)\n"
3075 << "{\n"
3076 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3077 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3078
3079 generateExecBufferIo(src, spec, "invocationNdx");
3080
3081 src << " EmitMeshTasksEXT(0u, 0u, 0u);\n"
3082 << "}\n";
3083
3084 return src.str();
3085 }
3086
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection,bool useTask)3087 void MeshTaskShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection,
3088 bool useTask)
3089 {
3090 DE_ASSERT(shaderSpec.spirvCase == SPIRV_CASETYPE_NONE);
3091 programCollection.glslSources.add("mesh")
3092 << glu::MeshSource(generateMeshShader(shaderSpec, useTask)) << shaderSpec.buildOptions;
3093 if (useTask)
3094 programCollection.glslSources.add("task")
3095 << glu::TaskSource(generateTaskShader(shaderSpec)) << shaderSpec.buildOptions;
3096 }
3097
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3098 void MeshTaskShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3099 VkDescriptorSet extraResources)
3100 {
3101 const auto vkDevice = m_context.getDevice();
3102 const auto &vk = m_context.getDeviceInterface();
3103 const auto queue = m_context.getUniversalQueue();
3104 const auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3105 const auto bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
3106 const auto &binaries = m_context.getBinaryCollection();
3107 const bool useTask = binaries.contains("task");
3108 const auto shaderStage = (useTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
3109 const auto pipelineStage =
3110 (useTask ? VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT : VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT);
3111
3112 DE_ASSERT((m_extraResourcesLayout != DE_NULL) == (extraResources != DE_NULL));
3113
3114 // Create input and output buffers.
3115 initBuffers(numValues);
3116
3117 // Setup input buffer & copy data
3118 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
3119 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
3120 // the shader.
3121 uploadInputBuffer(inputs, numValues,
3122 m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
3123
3124 // Create command pool
3125 const auto cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3126
3127 // Descriptor pool, set layout and set.
3128 DescriptorPoolBuilder descriptorPoolBuilder;
3129 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3130
3131 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3132 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3133 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3134 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3135
3136 const auto descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3137 const auto descriptorPool =
3138 descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3139 const auto descriptorSet = makeDescriptorSet(vk, vkDevice, descriptorPool.get(), descriptorSetLayout.get());
3140
3141 // Create pipeline layout
3142 std::vector<VkDescriptorSetLayout> setLayouts;
3143 setLayouts.push_back(descriptorSetLayout.get());
3144 if (m_extraResourcesLayout != DE_NULL)
3145 setLayouts.push_back(m_extraResourcesLayout);
3146
3147 const auto pipelineLayout =
3148 makePipelineLayout(vk, vkDevice, static_cast<uint32_t>(setLayouts.size()), de::dataOrNull(setLayouts));
3149
3150 // Create shaders
3151 const auto meshShaderModule = createShaderModule(vk, vkDevice, binaries.get("mesh"));
3152 const auto taskShaderModule =
3153 (useTask ? createShaderModule(vk, vkDevice, binaries.get("task")) : Move<VkShaderModule>());
3154
3155 // Render pass and framebuffer.
3156 const auto fbExtent = makeExtent2D(1u, 1u);
3157 const auto renderPass = makeRenderPass(vk, vkDevice);
3158 const auto framebuffer =
3159 makeFramebuffer(vk, vkDevice, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
3160
3161 const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
3162 const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
3163
3164 // Create pipeline.
3165 const auto meshPipeline =
3166 makeGraphicsPipeline(vk, vkDevice, pipelineLayout.get(), taskShaderModule.get(), meshShaderModule.get(),
3167 DE_NULL, renderPass.get(), viewports, scissors);
3168
3169 const int maxValuesPerInvocation = m_context.getMeshShaderPropertiesEXT().maxMeshWorkGroupSize[0];
3170 const uint32_t inputStride = getInputStride();
3171 const uint32_t outputStride = getOutputStride();
3172 const auto outputBufferBinding =
3173 DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(OUTPUT_BUFFER_BINDING));
3174 const auto inputBufferBinding =
3175 DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(INPUT_BUFFER_BINDING));
3176 int curOffset = 0;
3177
3178 while (curOffset < numValues)
3179 {
3180 const auto remaining = numValues - curOffset;
3181 const auto numToExec = de::min(maxValuesPerInvocation, remaining);
3182
3183 // Update descriptors
3184 {
3185 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3186
3187 const auto outputDescriptorBufferInfo =
3188 makeDescriptorBufferInfo(m_outputBuffer.get(), curOffset * outputStride, numToExec * outputStride);
3189 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), outputBufferBinding,
3190 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3191
3192 if (inputStride)
3193 {
3194 const auto inputDescriptorBufferInfo =
3195 makeDescriptorBufferInfo(m_inputBuffer.get(), curOffset * inputStride, numToExec * inputStride);
3196 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), inputBufferBinding,
3197 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3198 }
3199
3200 descriptorSetUpdateBuilder.update(vk, vkDevice);
3201 }
3202
3203 std::vector<VkDescriptorSet> descriptorSets;
3204 descriptorSets.push_back(descriptorSet.get());
3205 if (extraResources != DE_NULL)
3206 descriptorSets.push_back(extraResources);
3207
3208 const auto bufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
3209 m_outputBuffer.get(), 0ull, VK_WHOLE_SIZE);
3210 const auto cmdBufferPtr = allocateCommandBuffer(vk, vkDevice, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3211 const auto cmdBuffer = cmdBufferPtr.get();
3212
3213 // Record command buffer, including pipeline barrier from output buffer to the host.
3214 beginCommandBuffer(vk, cmdBuffer);
3215 beginRenderPass(vk, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
3216 vk.cmdBindPipeline(cmdBuffer, bindPoint, meshPipeline.get());
3217 vk.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u,
3218 static_cast<uint32_t>(descriptorSets.size()), de::dataOrNull(descriptorSets), 0u,
3219 DE_NULL);
3220 vk.cmdDrawMeshTasksEXT(cmdBuffer, numToExec, 1u, 1u);
3221 endRenderPass(vk, cmdBuffer);
3222 cmdPipelineBufferMemoryBarrier(vk, cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, &bufferBarrier);
3223 endCommandBuffer(vk, cmdBuffer);
3224
3225 // Execute
3226 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer);
3227
3228 curOffset += numToExec;
3229 }
3230
3231 // Read back data
3232 readOutputBuffer(outputs, numValues);
3233 }
3234 #endif // CTS_USES_VULKANSC
3235
3236 // Tessellation utils
3237
generateVertexShaderForTess(void)3238 static std::string generateVertexShaderForTess(void)
3239 {
3240 std::ostringstream src;
3241 src << "#version 450\n"
3242 << "void main (void)\n{\n"
3243 << " gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
3244 << "}\n";
3245
3246 return src.str();
3247 }
3248
3249 class TessellationExecutor : public BufferIoExecutor
3250 {
3251 public:
3252 TessellationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3253 virtual ~TessellationExecutor(void);
3254
3255 void renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3256 VkDescriptorSet extraResources);
3257
3258 private:
3259 const VkDescriptorSetLayout m_extraResourcesLayout;
3260 };
3261
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3262 TessellationExecutor::TessellationExecutor(Context &context, const ShaderSpec &shaderSpec,
3263 VkDescriptorSetLayout extraResourcesLayout)
3264 : BufferIoExecutor(context, shaderSpec)
3265 , m_extraResourcesLayout(extraResourcesLayout)
3266 {
3267 const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
3268
3269 if (!features.tessellationShader)
3270 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
3271 }
3272
~TessellationExecutor(void)3273 TessellationExecutor::~TessellationExecutor(void)
3274 {
3275 }
3276
renderTess(uint32_t numValues,uint32_t vertexCount,uint32_t patchControlPoints,VkDescriptorSet extraResources)3277 void TessellationExecutor::renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3278 VkDescriptorSet extraResources)
3279 {
3280 const size_t inputBufferSize = numValues * getInputStride();
3281 const VkDevice vkDevice = m_context.getDevice();
3282 const DeviceInterface &vk = m_context.getDeviceInterface();
3283 const VkQueue queue = m_context.getUniversalQueue();
3284 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3285 Allocator &memAlloc = m_context.getDefaultAllocator();
3286
3287 const tcu::UVec2 renderSize(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
3288
3289 Move<VkImage> colorImage;
3290 de::MovePtr<Allocation> colorImageAlloc;
3291 VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
3292 Move<VkImageView> colorImageView;
3293
3294 Move<VkRenderPass> renderPass;
3295 Move<VkFramebuffer> framebuffer;
3296 Move<VkPipelineLayout> pipelineLayout;
3297 Move<VkPipeline> graphicsPipeline;
3298
3299 Move<VkShaderModule> vertexShaderModule;
3300 Move<VkShaderModule> tessControlShaderModule;
3301 Move<VkShaderModule> tessEvalShaderModule;
3302 Move<VkShaderModule> fragmentShaderModule;
3303
3304 Move<VkCommandPool> cmdPool;
3305 Move<VkCommandBuffer> cmdBuffer;
3306
3307 Move<VkDescriptorPool> descriptorPool;
3308 Move<VkDescriptorSetLayout> descriptorSetLayout;
3309 Move<VkDescriptorSet> descriptorSet;
3310 const uint32_t numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
3311
3312 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
3313
3314 // Create color image
3315 {
3316 const VkImageCreateInfo colorImageParams = {
3317 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
3318 DE_NULL, // const void* pNext;
3319 0u, // VkImageCreateFlags flags;
3320 VK_IMAGE_TYPE_2D, // VkImageType imageType;
3321 colorFormat, // VkFormat format;
3322 {renderSize.x(), renderSize.y(), 1u}, // VkExtent3D extent;
3323 1u, // uint32_t mipLevels;
3324 1u, // uint32_t arraySize;
3325 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
3326 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
3327 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
3328 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
3329 1u, // uint32_t queueFamilyCount;
3330 &queueFamilyIndex, // const uint32_t* pQueueFamilyIndices;
3331 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
3332 };
3333
3334 colorImage = createImage(vk, vkDevice, &colorImageParams);
3335
3336 // Allocate and bind color image memory
3337 colorImageAlloc =
3338 memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
3339 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
3340 }
3341
3342 // Create color attachment view
3343 {
3344 const VkImageViewCreateInfo colorImageViewParams = {
3345 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
3346 DE_NULL, // const void* pNext;
3347 0u, // VkImageViewCreateFlags flags;
3348 *colorImage, // VkImage image;
3349 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
3350 colorFormat, // VkFormat format;
3351 {
3352 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
3353 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
3354 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
3355 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
3356 }, // VkComponentsMapping components;
3357 {
3358 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
3359 0u, // uint32_t baseMipLevel;
3360 1u, // uint32_t mipLevels;
3361 0u, // uint32_t baseArraylayer;
3362 1u // uint32_t layerCount;
3363 } // VkImageSubresourceRange subresourceRange;
3364 };
3365
3366 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
3367 }
3368
3369 // Create render pass
3370 {
3371 const VkAttachmentDescription colorAttachmentDescription = {
3372 0u, // VkAttachmentDescriptorFlags flags;
3373 colorFormat, // VkFormat format;
3374 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
3375 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
3376 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
3377 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
3378 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
3379 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
3380 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout finalLayout
3381 };
3382
3383 const VkAttachmentDescription attachments[1] = {colorAttachmentDescription};
3384
3385 const VkAttachmentReference colorAttachmentReference = {
3386 0u, // uint32_t attachment;
3387 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
3388 };
3389
3390 const VkSubpassDescription subpassDescription = {
3391 0u, // VkSubpassDescriptionFlags flags;
3392 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
3393 0u, // uint32_t inputCount;
3394 DE_NULL, // const VkAttachmentReference* pInputAttachments;
3395 1u, // uint32_t colorCount;
3396 &colorAttachmentReference, // const VkAttachmentReference* pColorAttachments;
3397 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
3398 DE_NULL, // VkAttachmentReference depthStencilAttachment;
3399 0u, // uint32_t preserveCount;
3400 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
3401 };
3402
3403 const VkRenderPassCreateInfo renderPassParams = {
3404 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
3405 DE_NULL, // const void* pNext;
3406 0u, // VkRenderPassCreateFlags flags;
3407 1u, // uint32_t attachmentCount;
3408 attachments, // const VkAttachmentDescription* pAttachments;
3409 1u, // uint32_t subpassCount;
3410 &subpassDescription, // const VkSubpassDescription* pSubpasses;
3411 0u, // uint32_t dependencyCount;
3412 DE_NULL // const VkSubpassDependency* pDependencies;
3413 };
3414
3415 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3416 }
3417
3418 // Create framebuffer
3419 {
3420 const VkFramebufferCreateInfo framebufferParams = {
3421 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
3422 DE_NULL, // const void* pNext;
3423 0u, // VkFramebufferCreateFlags flags;
3424 *renderPass, // VkRenderPass renderPass;
3425 1u, // uint32_t attachmentCount;
3426 &*colorImageView, // const VkAttachmentBindInfo* pAttachments;
3427 (uint32_t)renderSize.x(), // uint32_t width;
3428 (uint32_t)renderSize.y(), // uint32_t height;
3429 1u // uint32_t layers;
3430 };
3431
3432 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3433 }
3434
3435 // Create descriptors
3436 {
3437 DescriptorPoolBuilder descriptorPoolBuilder;
3438 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3439
3440 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3441 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3442 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3443 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3444
3445 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3446 descriptorPool =
3447 descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3448
3449 const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL,
3450 *descriptorPool, 1u, &*descriptorSetLayout};
3451
3452 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3453 // Update descriptors
3454 {
3455 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3456 const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
3457 *m_outputBuffer, // VkBuffer buffer;
3458 0u, // VkDeviceSize offset;
3459 VK_WHOLE_SIZE // VkDeviceSize range;
3460 };
3461
3462 descriptorSetUpdateBuilder.writeSingle(
3463 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
3464 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3465
3466 VkDescriptorBufferInfo inputDescriptorBufferInfo = {
3467 0, // VkBuffer buffer;
3468 0u, // VkDeviceSize offset;
3469 VK_WHOLE_SIZE // VkDeviceSize range;
3470 };
3471
3472 if (inputBufferSize > 0)
3473 {
3474 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3475
3476 descriptorSetUpdateBuilder.writeSingle(
3477 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
3478 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3479 }
3480
3481 descriptorSetUpdateBuilder.update(vk, vkDevice);
3482 }
3483 }
3484
3485 // Create pipeline layout
3486 {
3487 const VkDescriptorSetLayout descriptorSetLayouts[] = {*descriptorSetLayout, m_extraResourcesLayout};
3488 const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
3489 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
3490 DE_NULL, // const void* pNext;
3491 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
3492 numDescriptorSets, // uint32_t descriptorSetCount;
3493 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
3494 0u, // uint32_t pushConstantRangeCount;
3495 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
3496 };
3497
3498 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3499 }
3500
3501 // Create shader modules
3502 {
3503 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3504 tessControlShaderModule =
3505 createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3506 tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3507 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3508 }
3509
3510 // Create pipeline
3511 {
3512 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
3513 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
3514 DE_NULL, // const void* pNext;
3515 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
3516 0u, // uint32_t bindingCount;
3517 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
3518 0u, // uint32_t attributeCount;
3519 DE_NULL, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
3520 };
3521
3522 const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
3523 const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
3524
3525 graphicsPipeline = makeGraphicsPipeline(
3526 vk, // const DeviceInterface& vk
3527 vkDevice, // const VkDevice device
3528 *pipelineLayout, // const VkPipelineLayout pipelineLayout
3529 *vertexShaderModule, // const VkShaderModule vertexShaderModule
3530 *tessControlShaderModule, // const VkShaderModule tessellationControlShaderModule
3531 *tessEvalShaderModule, // const VkShaderModule tessellationEvalShaderModule
3532 DE_NULL, // const VkShaderModule geometryShaderModule
3533 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
3534 *renderPass, // const VkRenderPass renderPass
3535 viewports, // const std::vector<VkViewport>& viewports
3536 scissors, // const std::vector<VkRect2D>& scissors
3537 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology topology
3538 0u, // const uint32_t subpass
3539 patchControlPoints, // const uint32_t patchControlPoints
3540 &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
3541 }
3542
3543 // Create command pool
3544 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3545
3546 // Create command buffer
3547 {
3548 const VkClearValue clearValue = getDefaultClearColor();
3549
3550 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3551
3552 beginCommandBuffer(vk, *cmdBuffer);
3553
3554 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
3555 clearValue);
3556
3557 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3558
3559 {
3560 const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
3561 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
3562 numDescriptorSets, descriptorSets, 0u, DE_NULL);
3563 }
3564
3565 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3566
3567 endRenderPass(vk, *cmdBuffer);
3568
3569 // Insert a barrier so data written by the shader is available to the host
3570 {
3571 const VkBufferMemoryBarrier bufferBarrier = {
3572 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
3573 DE_NULL, // const void* pNext;
3574 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
3575 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
3576 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
3577 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
3578 *m_outputBuffer, // VkBuffer buffer;
3579 0, // VkDeviceSize offset;
3580 VK_WHOLE_SIZE, // VkDeviceSize size;
3581 };
3582
3583 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT,
3584 vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0,
3585 (const VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
3586 (const VkImageMemoryBarrier *)DE_NULL);
3587 }
3588
3589 endCommandBuffer(vk, *cmdBuffer);
3590 }
3591
3592 // Execute Draw
3593 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3594 }
3595
3596 // TessControlExecutor
3597
3598 class TessControlExecutor : public TessellationExecutor
3599 {
3600 public:
3601 TessControlExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3602 virtual ~TessControlExecutor(void);
3603
3604 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3605
3606 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3607 VkDescriptorSet extraResources);
3608
3609 protected:
3610 static std::string generateTessControlShader(const ShaderSpec &shaderSpec);
3611 };
3612
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3613 TessControlExecutor::TessControlExecutor(Context &context, const ShaderSpec &shaderSpec,
3614 VkDescriptorSetLayout extraResourcesLayout)
3615 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3616 {
3617 }
3618
~TessControlExecutor(void)3619 TessControlExecutor::~TessControlExecutor(void)
3620 {
3621 }
3622
generateTessControlShader(const ShaderSpec & shaderSpec)3623 std::string TessControlExecutor::generateTessControlShader(const ShaderSpec &shaderSpec)
3624 {
3625 std::ostringstream src;
3626 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3627
3628 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3629 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3630
3631 if (!shaderSpec.globalDeclarations.empty())
3632 src << shaderSpec.globalDeclarations << "\n";
3633
3634 src << "\nlayout(vertices = 1) out;\n\n";
3635
3636 declareBufferBlocks(src, shaderSpec);
3637
3638 src << "void main (void)\n{\n";
3639
3640 for (int ndx = 0; ndx < 2; ndx++)
3641 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3642
3643 for (int ndx = 0; ndx < 4; ndx++)
3644 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3645
3646 src << "\n"
3647 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3648
3649 generateExecBufferIo(src, shaderSpec, "invocationId");
3650
3651 src << "}\n";
3652
3653 return src.str();
3654 }
3655
generateEmptyTessEvalShader()3656 static std::string generateEmptyTessEvalShader()
3657 {
3658 std::ostringstream src;
3659
3660 src << "#version 450\n"
3661 "#extension GL_EXT_tessellation_shader : require\n\n";
3662
3663 src << "layout(triangles, ccw) in;\n";
3664
3665 src << "\nvoid main (void)\n{\n"
3666 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3667 << "}\n";
3668
3669 return src.str();
3670 }
3671
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3672 void TessControlExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3673 {
3674 programCollection.glslSources.add("vert")
3675 << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3676 programCollection.glslSources.add("tess_control")
3677 << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3678 programCollection.glslSources.add("tess_eval")
3679 << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3680 programCollection.glslSources.add("frag")
3681 << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3682 }
3683
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3684 void TessControlExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3685 VkDescriptorSet extraResources)
3686 {
3687 const uint32_t patchSize = 3;
3688
3689 initBuffers(numValues);
3690
3691 // Setup input buffer & copy data
3692 uploadInputBuffer(inputs, numValues, false);
3693
3694 renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3695
3696 // Read back data
3697 readOutputBuffer(outputs, numValues);
3698 }
3699
3700 // TessEvaluationExecutor
3701
3702 class TessEvaluationExecutor : public TessellationExecutor
3703 {
3704 public:
3705 TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3706 virtual ~TessEvaluationExecutor(void);
3707
3708 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3709
3710 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3711 VkDescriptorSet extraResources);
3712
3713 protected:
3714 static std::string generateTessEvalShader(const ShaderSpec &shaderSpec);
3715 };
3716
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3717 TessEvaluationExecutor::TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec,
3718 VkDescriptorSetLayout extraResourcesLayout)
3719 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3720 {
3721 }
3722
~TessEvaluationExecutor(void)3723 TessEvaluationExecutor::~TessEvaluationExecutor(void)
3724 {
3725 }
3726
generatePassthroughTessControlShader(void)3727 static std::string generatePassthroughTessControlShader(void)
3728 {
3729 std::ostringstream src;
3730
3731 src << "#version 450\n"
3732 "#extension GL_EXT_tessellation_shader : require\n\n";
3733
3734 src << "layout(vertices = 1) out;\n\n";
3735
3736 src << "void main (void)\n{\n";
3737
3738 for (int ndx = 0; ndx < 2; ndx++)
3739 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3740
3741 for (int ndx = 0; ndx < 4; ndx++)
3742 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3743
3744 src << "}\n";
3745
3746 return src.str();
3747 }
3748
generateTessEvalShader(const ShaderSpec & shaderSpec)3749 std::string TessEvaluationExecutor::generateTessEvalShader(const ShaderSpec &shaderSpec)
3750 {
3751 std::ostringstream src;
3752
3753 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3754
3755 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3756 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3757
3758 if (!shaderSpec.globalDeclarations.empty())
3759 src << shaderSpec.globalDeclarations << "\n";
3760
3761 src << "\n";
3762
3763 src << "layout(isolines, equal_spacing) in;\n\n";
3764
3765 declareBufferBlocks(src, shaderSpec);
3766
3767 src << "void main (void)\n{\n"
3768 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3769 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3770
3771 generateExecBufferIo(src, shaderSpec, "invocationId");
3772
3773 src << "}\n";
3774
3775 return src.str();
3776 }
3777
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3778 void TessEvaluationExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3779 {
3780 programCollection.glslSources.add("vert")
3781 << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3782 programCollection.glslSources.add("tess_control")
3783 << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3784 programCollection.glslSources.add("tess_eval")
3785 << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3786 programCollection.glslSources.add("frag")
3787 << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3788 }
3789
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3790 void TessEvaluationExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3791 VkDescriptorSet extraResources)
3792 {
3793 const int patchSize = 2;
3794 const int alignedValues = deAlign32(numValues, patchSize);
3795
3796 // Initialize buffers with aligned value count to make room for padding
3797 initBuffers(alignedValues);
3798
3799 // Setup input buffer & copy data
3800 uploadInputBuffer(inputs, numValues, false);
3801
3802 renderTess((uint32_t)alignedValues, (uint32_t)alignedValues, (uint32_t)patchSize, extraResources);
3803
3804 // Read back data
3805 readOutputBuffer(outputs, numValues);
3806 }
3807
3808 } // namespace
3809
3810 // ShaderExecutor
3811
~ShaderExecutor(void)3812 ShaderExecutor::~ShaderExecutor(void)
3813 {
3814 }
3815
areInputs16Bit(void) const3816 bool ShaderExecutor::areInputs16Bit(void) const
3817 {
3818 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3819 ++symIter)
3820 {
3821 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3822 return true;
3823 }
3824 return false;
3825 }
3826
areOutputs16Bit(void) const3827 bool ShaderExecutor::areOutputs16Bit(void) const
3828 {
3829 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3830 ++symIter)
3831 {
3832 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3833 return true;
3834 }
3835 return false;
3836 }
3837
isOutput16Bit(const size_t ndx) const3838 bool ShaderExecutor::isOutput16Bit(const size_t ndx) const
3839 {
3840 if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3841 return true;
3842 return false;
3843 }
3844
areInputs64Bit(void) const3845 bool ShaderExecutor::areInputs64Bit(void) const
3846 {
3847 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3848 ++symIter)
3849 {
3850 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3851 return true;
3852 }
3853 return false;
3854 }
3855
areOutputs64Bit(void) const3856 bool ShaderExecutor::areOutputs64Bit(void) const
3857 {
3858 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3859 ++symIter)
3860 {
3861 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3862 return true;
3863 }
3864 return false;
3865 }
3866
isOutput64Bit(const size_t ndx) const3867 bool ShaderExecutor::isOutput64Bit(const size_t ndx) const
3868 {
3869 if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3870 return true;
3871 return false;
3872 }
3873
3874 // Utilities
3875
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3876 void generateSources(glu::ShaderType shaderType, const ShaderSpec &shaderSpec, vk::SourceCollections &dst)
3877 {
3878 switch (shaderType)
3879 {
3880 case glu::SHADERTYPE_VERTEX:
3881 VertexShaderExecutor::generateSources(shaderSpec, dst);
3882 break;
3883 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3884 TessControlExecutor::generateSources(shaderSpec, dst);
3885 break;
3886 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3887 TessEvaluationExecutor::generateSources(shaderSpec, dst);
3888 break;
3889 case glu::SHADERTYPE_GEOMETRY:
3890 GeometryShaderExecutor::generateSources(shaderSpec, dst);
3891 break;
3892 case glu::SHADERTYPE_FRAGMENT:
3893 FragmentShaderExecutor::generateSources(shaderSpec, dst);
3894 break;
3895 case glu::SHADERTYPE_COMPUTE:
3896 ComputeShaderExecutor::generateSources(shaderSpec, dst);
3897 break;
3898 #ifndef CTS_USES_VULKANSC
3899 case glu::SHADERTYPE_MESH:
3900 MeshTaskShaderExecutor::generateSources(shaderSpec, dst, false /*useTask*/);
3901 break;
3902 case glu::SHADERTYPE_TASK:
3903 MeshTaskShaderExecutor::generateSources(shaderSpec, dst, true /*useTask*/);
3904 break;
3905 #endif // CTS_USES_VULKANSC
3906 default:
3907 TCU_THROW(InternalError, "Unsupported shader type");
3908 }
3909 }
3910
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3911 ShaderExecutor *createExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
3912 VkDescriptorSetLayout extraResourcesLayout)
3913 {
3914 switch (shaderType)
3915 {
3916 case glu::SHADERTYPE_VERTEX:
3917 return new VertexShaderExecutor(context, shaderSpec, extraResourcesLayout);
3918 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3919 return new TessControlExecutor(context, shaderSpec, extraResourcesLayout);
3920 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3921 return new TessEvaluationExecutor(context, shaderSpec, extraResourcesLayout);
3922 case glu::SHADERTYPE_GEOMETRY:
3923 return new GeometryShaderExecutor(context, shaderSpec, extraResourcesLayout);
3924 case glu::SHADERTYPE_FRAGMENT:
3925 return new FragmentShaderExecutor(context, shaderSpec, extraResourcesLayout);
3926 case glu::SHADERTYPE_COMPUTE:
3927 return new ComputeShaderExecutor(context, shaderSpec, extraResourcesLayout);
3928 #ifndef CTS_USES_VULKANSC
3929 case glu::SHADERTYPE_MESH:
3930 return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3931 case glu::SHADERTYPE_TASK:
3932 return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3933 #endif // CTS_USES_VULKANSC
3934 default:
3935 TCU_THROW(InternalError, "Unsupported shader type");
3936 }
3937 }
3938
executorSupported(glu::ShaderType shaderType)3939 bool executorSupported(glu::ShaderType shaderType)
3940 {
3941 switch (shaderType)
3942 {
3943 case glu::SHADERTYPE_VERTEX:
3944 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3945 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3946 case glu::SHADERTYPE_GEOMETRY:
3947 case glu::SHADERTYPE_FRAGMENT:
3948 case glu::SHADERTYPE_COMPUTE:
3949 case glu::SHADERTYPE_MESH:
3950 case glu::SHADERTYPE_TASK:
3951 return true;
3952 default:
3953 return false;
3954 }
3955 }
3956
checkSupportShader(Context & context,const glu::ShaderType shaderType)3957 void checkSupportShader(Context &context, const glu::ShaderType shaderType)
3958 {
3959 // Stage support.
3960 switch (shaderType)
3961 {
3962 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3963 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3964 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
3965 break;
3966
3967 case glu::SHADERTYPE_GEOMETRY:
3968 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
3969 break;
3970
3971 case glu::SHADERTYPE_TASK:
3972 case glu::SHADERTYPE_MESH:
3973 {
3974 context.requireDeviceFunctionality("VK_EXT_mesh_shader");
3975
3976 if (shaderType == glu::SHADERTYPE_TASK)
3977 {
3978 #ifndef CTS_USES_VULKANSC
3979 const auto &features = context.getMeshShaderFeaturesEXT();
3980 if (!features.taskShader)
3981 TCU_THROW(NotSupportedError, "taskShader not supported");
3982 #else // CTS_USES_VULKANSC
3983 TCU_THROW(NotSupportedError, "taskShader not supported");
3984 #endif // CTS_USES_VULKANSC
3985 }
3986 }
3987 break;
3988
3989 default:
3990 break;
3991 }
3992
3993 // Stores and atomic operation support.
3994 switch (shaderType)
3995 {
3996 case glu::SHADERTYPE_VERTEX:
3997 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3998 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3999 case glu::SHADERTYPE_GEOMETRY:
4000 case glu::SHADERTYPE_TASK:
4001 case glu::SHADERTYPE_MESH:
4002 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
4003 break;
4004 case glu::SHADERTYPE_FRAGMENT:
4005 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
4006 break;
4007 case glu::SHADERTYPE_COMPUTE:
4008 break;
4009 default:
4010 DE_FATAL("Unsupported shader type");
4011 break;
4012 }
4013
4014 #ifndef CTS_USES_VULKANSC
4015 if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
4016 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4017 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4018 {
4019 TCU_THROW(NotSupportedError,
4020 "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4021 }
4022 #endif // CTS_USES_VULKANSC
4023 }
4024
4025 } // namespace shaderexecutor
4026 } // namespace vkt
4027