xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/shaderexecutor/vktShaderExecutor.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ShaderExecutor
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 
39 #include "gluShaderUtil.hpp"
40 
41 #include "tcuVector.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuTextureUtil.hpp"
44 
45 #include "deUniquePtr.hpp"
46 #include "deStringUtil.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deFloat16.h"
49 
50 #include <map>
51 #include <sstream>
52 #include <iostream>
53 
54 using std::vector;
55 using namespace vk;
56 
57 namespace vkt
58 {
59 namespace shaderexecutor
60 {
61 namespace
62 {
63 
64 enum
65 {
66     DEFAULT_RENDER_WIDTH  = 100,
67     DEFAULT_RENDER_HEIGHT = 100,
68 };
69 
70 // Common typedefs
71 
72 typedef de::SharedPtr<Unique<VkImage>> VkImageSp;
73 typedef de::SharedPtr<Unique<VkImageView>> VkImageViewSp;
74 typedef de::SharedPtr<Unique<VkBuffer>> VkBufferSp;
75 typedef de::SharedPtr<Allocation> AllocationSp;
76 
77 static VkFormat getAttributeFormat(const glu::DataType dataType);
78 
79 // Shader utilities
80 
getDefaultClearColor(void)81 static VkClearValue getDefaultClearColor(void)
82 {
83     return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
84 }
85 
generateEmptyFragmentSource(void)86 static std::string generateEmptyFragmentSource(void)
87 {
88     std::ostringstream src;
89 
90     src << "#version 450\n"
91            "layout(location=0) out highp vec4 o_color;\n";
92 
93     src << "void main (void)\n{\n";
94     src << "    o_color = vec4(0.0);\n";
95     src << "}\n";
96 
97     return src.str();
98 }
99 
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)100 void packFloat16Bit(std::ostream &src, const std::vector<Symbol> &outputs)
101 {
102     for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
103     {
104         if (glu::isDataTypeFloatType(symIter->varType.getBasicType()))
105         {
106             if (glu::isDataTypeVector(symIter->varType.getBasicType()))
107             {
108                 for (int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
109                 {
110                     src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2("
111                         << symIter->name << "[" << i << "], -1.0)));\n";
112                 }
113             }
114             else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
115             {
116                 int maxRow = 0;
117                 int maxCol = 0;
118                 switch (symIter->varType.getBasicType())
119                 {
120                 case glu::TYPE_FLOAT_MAT2:
121                     maxRow = maxCol = 2;
122                     break;
123                 case glu::TYPE_FLOAT_MAT2X3:
124                     maxRow = 2;
125                     maxCol = 3;
126                     break;
127                 case glu::TYPE_FLOAT_MAT2X4:
128                     maxRow = 2;
129                     maxCol = 4;
130                     break;
131                 case glu::TYPE_FLOAT_MAT3X2:
132                     maxRow = 3;
133                     maxCol = 2;
134                     break;
135                 case glu::TYPE_FLOAT_MAT3:
136                     maxRow = maxCol = 3;
137                     break;
138                 case glu::TYPE_FLOAT_MAT3X4:
139                     maxRow = 3;
140                     maxCol = 4;
141                     break;
142                 case glu::TYPE_FLOAT_MAT4X2:
143                     maxRow = 4;
144                     maxCol = 2;
145                     break;
146                 case glu::TYPE_FLOAT_MAT4X3:
147                     maxRow = 4;
148                     maxCol = 3;
149                     break;
150                 case glu::TYPE_FLOAT_MAT4:
151                     maxRow = maxCol = 4;
152                     break;
153                 default:
154                     DE_ASSERT(false);
155                     break;
156                 }
157 
158                 for (int i = 0; i < maxRow; i++)
159                     for (int j = 0; j < maxCol; j++)
160                     {
161                         src << "\tpacked_" << symIter->name << "[" << i << "][" << j
162                             << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j
163                             << "], -1.0)));\n";
164                     }
165             }
166             else
167             {
168                 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name
169                     << ", -1.0)));\n";
170             }
171         }
172     }
173 }
174 
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)175 static std::string generatePassthroughVertexShader(const ShaderSpec &shaderSpec, const char *inputPrefix,
176                                                    const char *outputPrefix)
177 {
178     std::ostringstream src;
179     int location = 0;
180 
181     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
182 
183     if (!shaderSpec.globalDeclarations.empty())
184         src << shaderSpec.globalDeclarations << "\n";
185 
186     src << "layout(location = " << location << ") in highp vec4 a_position;\n";
187 
188     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
189     {
190         location++;
191         src << "layout(location = " << location << ") in " << glu::declare(input->varType, inputPrefix + input->name)
192             << ";\n"
193             << "layout(location = " << location - 1 << ") flat out "
194             << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
195     }
196 
197     src << "\nvoid main (void)\n{\n"
198         << "    gl_Position = a_position;\n"
199         << "    gl_PointSize = 1.0;\n";
200 
201     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
202         src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
203 
204     src << "}\n";
205 
206     return src.str();
207 }
208 
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)209 static std::string generateVertexShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
210                                         const std::string &outputPrefix)
211 {
212     DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
213 
214     std::ostringstream src;
215 
216     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
217 
218     if (!shaderSpec.globalDeclarations.empty())
219         src << shaderSpec.globalDeclarations << "\n";
220 
221     src << "layout(location = 0) in highp vec4 a_position;\n";
222 
223     int locationNumber = 1;
224     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
225          ++input, ++locationNumber)
226     {
227         src << "layout(location = " << locationNumber << ") in "
228             << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
229     }
230 
231     locationNumber = 0;
232     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
233          ++output, ++locationNumber)
234     {
235         DE_ASSERT(output->varType.isBasicType());
236 
237         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
238         {
239             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
240             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
241             const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
242 
243             src << "layout(location = " << locationNumber << ") flat out "
244                 << glu::declare(intType, outputPrefix + output->name) << ";\n";
245         }
246         else
247             src << "layout(location = " << locationNumber << ") flat out "
248                 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
249     }
250 
251     src << "\n"
252         << "void main (void)\n"
253         << "{\n"
254         << "    gl_Position = a_position;\n"
255         << "    gl_PointSize = 1.0;\n";
256 
257     // Declare & fetch local input variables
258     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
259     {
260         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
261         {
262             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
263             src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
264         }
265         else
266             src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
267     }
268 
269     // Declare local output variables
270     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
271          ++output)
272     {
273         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
274         {
275             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
276             src << "\t" << tname << " " << output->name << ";\n";
277             const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
278             src << "\t" << tname2 << " "
279                 << "packed_" << output->name << ";\n";
280         }
281         else
282             src << "\t" << glu::declare(output->varType, output->name) << ";\n";
283     }
284 
285     // Operation - indented to correct level.
286     {
287         std::istringstream opSrc(shaderSpec.source);
288         std::string line;
289 
290         while (std::getline(opSrc, line))
291             src << "\t" << line << "\n";
292     }
293 
294     if (shaderSpec.packFloat16Bit)
295         packFloat16Bit(src, shaderSpec.outputs);
296 
297     // Assignments to outputs.
298     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
299          ++output)
300     {
301         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
302         {
303             src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
304         }
305         else
306         {
307             if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
308             {
309                 const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
310                 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
311 
312                 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
313                     << output->name << ");\n";
314             }
315             else
316                 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
317         }
318     }
319 
320     src << "}\n";
321 
322     return src.str();
323 }
324 
325 struct FragmentOutputLayout
326 {
327     std::vector<const Symbol *> locationSymbols; //! Symbols by location
328     std::map<std::string, int> locationMap;      //! Map from symbol name to start location
329 };
330 
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)331 static void generateFragShaderOutputDecl(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
332                                          const std::map<std::string, int> &outLocationMap,
333                                          const std::string &outputPrefix)
334 {
335     for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
336     {
337         const Symbol &output         = shaderSpec.outputs[outNdx];
338         const int location           = de::lookup(outLocationMap, output.name);
339         const std::string outVarName = outputPrefix + output.name;
340         glu::VariableDeclaration decl(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST,
341                                       glu::Layout(location));
342 
343         TCU_CHECK_INTERNAL(output.varType.isBasicType());
344 
345         if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
346         {
347             const int vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
348             const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
349             const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
350 
351             decl.varType = uintType;
352             src << decl << ";\n";
353         }
354         else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
355         {
356             const int vecSize                = glu::getDataTypeScalarSize(output.varType.getBasicType());
357             const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
358             const glu::VarType intType(intBasicType, glu::PRECISION_HIGHP);
359 
360             decl.varType = intType;
361             src << decl << ";\n";
362         }
363         else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
364         {
365             const int vecSize                 = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
366             const int numVecs                 = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
367             const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
368             const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
369 
370             decl.varType = uintType;
371             for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
372             {
373                 decl.name            = outVarName + "_" + de::toString(vecNdx);
374                 decl.layout.location = location + vecNdx;
375                 src << decl << ";\n";
376             }
377         }
378         else
379             src << decl << ";\n";
380     }
381 }
382 
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)383 static void generateFragShaderOutAssign(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
384                                         const std::string &valuePrefix, const std::string &outputPrefix,
385                                         const bool isInput16Bit = false)
386 {
387     if (isInput16Bit)
388         packFloat16Bit(src, shaderSpec.outputs);
389 
390     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
391          ++output)
392     {
393         const std::string packPrefix =
394             (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
395 
396         if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
397             src << "    o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
398         else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
399         {
400             const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
401 
402             for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
403                 if (useIntOutputs)
404                     src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix
405                         << output->name << "[" << vecNdx << "]);\n";
406                 else
407                     src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix
408                         << output->name << "[" << vecNdx << "];\n";
409         }
410         else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
411         {
412             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
413             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
414 
415             src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
416                 << valuePrefix << output->name << ");\n";
417         }
418         else
419             src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
420     }
421 }
422 
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)423 static std::string generatePassthroughFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
424                                                      const std::map<std::string, int> &outLocationMap,
425                                                      const std::string &inputPrefix, const std::string &outputPrefix)
426 {
427     std::ostringstream src;
428 
429     src << "#version 450\n";
430 
431     if (!shaderSpec.globalDeclarations.empty())
432         src << shaderSpec.globalDeclarations << "\n";
433 
434     int locationNumber = 0;
435     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
436          ++output, ++locationNumber)
437     {
438         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
439         {
440             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
441             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
442             const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
443 
444             src << "layout(location = " << locationNumber << ") flat in "
445                 << glu::declare(intType, inputPrefix + output->name) << ";\n";
446         }
447         else
448             src << "layout(location = " << locationNumber << ") flat in "
449                 << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
450     }
451 
452     generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
453 
454     src << "\nvoid main (void)\n{\n";
455 
456     generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
457 
458     src << "}\n";
459 
460     return src.str();
461 }
462 
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)463 static std::string generateGeometryShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
464                                           const std::string &outputPrefix, const bool pointSizeSupported)
465 {
466     DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
467 
468     std::ostringstream src;
469 
470     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
471 
472     if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
473         src << "#extension GL_EXT_geometry_shader : require\n";
474 
475     if (!shaderSpec.globalDeclarations.empty())
476         src << shaderSpec.globalDeclarations << "\n";
477 
478     src << "layout(points) in;\n"
479         << "layout(points, max_vertices = 1) out;\n";
480 
481     int locationNumber = 0;
482     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
483          ++input, ++locationNumber)
484         src << "layout(location = " << locationNumber << ") flat in "
485             << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
486 
487     locationNumber = 0;
488     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
489          ++output, ++locationNumber)
490     {
491         DE_ASSERT(output->varType.isBasicType());
492 
493         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
494         {
495             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
496             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
497             const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
498 
499             src << "layout(location = " << locationNumber << ") flat out "
500                 << glu::declare(intType, outputPrefix + output->name) << ";\n";
501         }
502         else
503             src << "layout(location = " << locationNumber << ") flat out "
504                 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
505     }
506 
507     src << "\n"
508         << "void main (void)\n"
509         << "{\n"
510         << "    gl_Position = gl_in[0].gl_Position;\n"
511         << (pointSizeSupported ? "    gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
512 
513     // Fetch input variables
514     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
515         src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
516 
517     // Declare local output variables.
518     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
519          ++output)
520         src << "\t" << glu::declare(output->varType, output->name) << ";\n";
521 
522     src << "\n";
523 
524     // Operation - indented to correct level.
525     {
526         std::istringstream opSrc(shaderSpec.source);
527         std::string line;
528 
529         while (std::getline(opSrc, line))
530             src << "\t" << line << "\n";
531     }
532 
533     // Assignments to outputs.
534     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
535          ++output)
536     {
537         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
538         {
539             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
540             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
541 
542             src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
543                 << output->name << ");\n";
544         }
545         else
546             src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
547     }
548 
549     src << "    EmitVertex();\n"
550         << "    EndPrimitive();\n"
551         << "}\n";
552 
553     return src.str();
554 }
555 
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)556 static std::string generateFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
557                                           const std::map<std::string, int> &outLocationMap,
558                                           const std::string &inputPrefix, const std::string &outputPrefix)
559 {
560     std::ostringstream src;
561     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
562     if (!shaderSpec.globalDeclarations.empty())
563         src << shaderSpec.globalDeclarations << "\n";
564 
565     int locationNumber = 0;
566     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
567          ++input, ++locationNumber)
568     {
569         src << "layout(location = " << locationNumber << ") flat in "
570             << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
571     }
572 
573     generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
574 
575     src << "\nvoid main (void)\n{\n";
576 
577     // Declare & fetch local input variables
578     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
579     {
580         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
581         {
582             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
583             src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
584         }
585         else
586             src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
587     }
588 
589     // Declare output variables
590     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
591          ++output)
592     {
593         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
594         {
595             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
596             src << "\t" << tname << " " << output->name << ";\n";
597             const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
598             src << "\t" << tname2 << " "
599                 << "packed_" << output->name << ";\n";
600         }
601         else
602             src << "\t" << glu::declare(output->varType, output->name) << ";\n";
603     }
604 
605     // Operation - indented to correct level.
606     {
607         std::istringstream opSrc(shaderSpec.source);
608         std::string line;
609 
610         while (std::getline(opSrc, line))
611             src << "\t" << line << "\n";
612     }
613 
614     generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
615 
616     src << "}\n";
617 
618     return src.str();
619 }
620 
621 // FragmentOutExecutor
622 
623 class FragmentOutExecutor : public ShaderExecutor
624 {
625 public:
626     FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
627                         VkDescriptorSetLayout extraResourcesLayout);
628     virtual ~FragmentOutExecutor(void);
629 
630     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
631                          VkDescriptorSet extraResources);
632 
633 protected:
634     const glu::ShaderType m_shaderType;
635     const FragmentOutputLayout m_outputLayout;
636 
637 private:
638     void bindAttributes(int numValues, const void *const *inputs);
639 
640     void addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement, uint32_t count,
641                       const void *dataPtr);
642     // reinit render data members
643     virtual void clearRenderData(void);
644 
645     const VkDescriptorSetLayout m_extraResourcesLayout;
646 
647     std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
648     std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
649     std::vector<VkBufferSp> m_vertexBuffers;
650     std::vector<AllocationSp> m_vertexBufferAllocs;
651 };
652 
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)653 static FragmentOutputLayout computeFragmentOutputLayout(const std::vector<Symbol> &symbols)
654 {
655     FragmentOutputLayout ret;
656     int location = 0;
657 
658     for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
659     {
660         const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
661 
662         TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
663         de::insert(ret.locationMap, it->name, location);
664         location += numLocations;
665 
666         for (int ndx = 0; ndx < numLocations; ++ndx)
667             ret.locationSymbols.push_back(&*it);
668     }
669 
670     return ret;
671 }
672 
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)673 FragmentOutExecutor::FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
674                                          VkDescriptorSetLayout extraResourcesLayout)
675     : ShaderExecutor(context, shaderSpec)
676     , m_shaderType(shaderType)
677     , m_outputLayout(computeFragmentOutputLayout(m_shaderSpec.outputs))
678     , m_extraResourcesLayout(extraResourcesLayout)
679 {
680     const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
681     const InstanceInterface &vki          = m_context.getInstanceInterface();
682 
683     // Input attributes
684     for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
685     {
686         const Symbol &symbol                      = m_shaderSpec.inputs[inputNdx];
687         const glu::DataType basicType             = symbol.varType.getBasicType();
688         const VkFormat format                     = getAttributeFormat(basicType);
689         const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
690         if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
691             TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
692     }
693 }
694 
~FragmentOutExecutor(void)695 FragmentOutExecutor::~FragmentOutExecutor(void)
696 {
697 }
698 
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)699 static std::vector<tcu::Vec2> computeVertexPositions(int numValues, const tcu::IVec2 &renderSize)
700 {
701     std::vector<tcu::Vec2> positions(numValues);
702     for (int valNdx = 0; valNdx < numValues; valNdx++)
703     {
704         const int ix   = valNdx % renderSize.x();
705         const int iy   = valNdx / renderSize.x();
706         const float fx = -1.0f + 2.0f * ((float(ix) + 0.5f) / float(renderSize.x()));
707         const float fy = -1.0f + 2.0f * ((float(iy) + 0.5f) / float(renderSize.y()));
708 
709         positions[valNdx] = tcu::Vec2(fx, fy);
710     }
711 
712     return positions;
713 }
714 
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)715 static tcu::TextureFormat getRenderbufferFormatForOutput(const glu::VarType &outputType, bool useIntOutputs)
716 {
717     const tcu::TextureFormat::ChannelOrder channelOrderMap[] = {tcu::TextureFormat::R, tcu::TextureFormat::RG,
718                                                                 tcu::TextureFormat::RGBA, // No RGB variants available.
719                                                                 tcu::TextureFormat::RGBA};
720 
721     const glu::DataType basicType = outputType.getBasicType();
722     const int numComps            = glu::getDataTypeNumComponents(basicType);
723     tcu::TextureFormat::ChannelType channelType;
724 
725     switch (glu::getDataTypeScalarType(basicType))
726     {
727     case glu::TYPE_UINT:
728         channelType = tcu::TextureFormat::UNSIGNED_INT32;
729         break;
730     case glu::TYPE_INT:
731         channelType = tcu::TextureFormat::SIGNED_INT32;
732         break;
733     case glu::TYPE_BOOL:
734         channelType = tcu::TextureFormat::SIGNED_INT32;
735         break;
736     case glu::TYPE_FLOAT:
737         channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;
738         break;
739     case glu::TYPE_FLOAT16:
740         channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;
741         break;
742     default:
743         throw tcu::InternalError("Invalid output type");
744     }
745 
746     DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
747 
748     return tcu::TextureFormat(channelOrderMap[numComps - 1], channelType);
749 }
750 
getAttributeFormat(const glu::DataType dataType)751 static VkFormat getAttributeFormat(const glu::DataType dataType)
752 {
753     switch (dataType)
754     {
755     case glu::TYPE_FLOAT16:
756         return VK_FORMAT_R16_SFLOAT;
757     case glu::TYPE_FLOAT16_VEC2:
758         return VK_FORMAT_R16G16_SFLOAT;
759     case glu::TYPE_FLOAT16_VEC3:
760         return VK_FORMAT_R16G16B16_SFLOAT;
761     case glu::TYPE_FLOAT16_VEC4:
762         return VK_FORMAT_R16G16B16A16_SFLOAT;
763 
764     case glu::TYPE_FLOAT:
765         return VK_FORMAT_R32_SFLOAT;
766     case glu::TYPE_FLOAT_VEC2:
767         return VK_FORMAT_R32G32_SFLOAT;
768     case glu::TYPE_FLOAT_VEC3:
769         return VK_FORMAT_R32G32B32_SFLOAT;
770     case glu::TYPE_FLOAT_VEC4:
771         return VK_FORMAT_R32G32B32A32_SFLOAT;
772 
773     case glu::TYPE_INT:
774         return VK_FORMAT_R32_SINT;
775     case glu::TYPE_INT_VEC2:
776         return VK_FORMAT_R32G32_SINT;
777     case glu::TYPE_INT_VEC3:
778         return VK_FORMAT_R32G32B32_SINT;
779     case glu::TYPE_INT_VEC4:
780         return VK_FORMAT_R32G32B32A32_SINT;
781 
782     case glu::TYPE_UINT:
783         return VK_FORMAT_R32_UINT;
784     case glu::TYPE_UINT_VEC2:
785         return VK_FORMAT_R32G32_UINT;
786     case glu::TYPE_UINT_VEC3:
787         return VK_FORMAT_R32G32B32_UINT;
788     case glu::TYPE_UINT_VEC4:
789         return VK_FORMAT_R32G32B32A32_UINT;
790 
791     case glu::TYPE_FLOAT_MAT2:
792         return VK_FORMAT_R32G32_SFLOAT;
793     case glu::TYPE_FLOAT_MAT2X3:
794         return VK_FORMAT_R32G32B32_SFLOAT;
795     case glu::TYPE_FLOAT_MAT2X4:
796         return VK_FORMAT_R32G32B32A32_SFLOAT;
797     case glu::TYPE_FLOAT_MAT3X2:
798         return VK_FORMAT_R32G32_SFLOAT;
799     case glu::TYPE_FLOAT_MAT3:
800         return VK_FORMAT_R32G32B32_SFLOAT;
801     case glu::TYPE_FLOAT_MAT3X4:
802         return VK_FORMAT_R32G32B32A32_SFLOAT;
803     case glu::TYPE_FLOAT_MAT4X2:
804         return VK_FORMAT_R32G32_SFLOAT;
805     case glu::TYPE_FLOAT_MAT4X3:
806         return VK_FORMAT_R32G32B32_SFLOAT;
807     case glu::TYPE_FLOAT_MAT4:
808         return VK_FORMAT_R32G32B32A32_SFLOAT;
809     default:
810         DE_ASSERT(false);
811         return VK_FORMAT_UNDEFINED;
812     }
813 }
814 
addAttribute(uint32_t bindingLocation,VkFormat format,uint32_t sizePerElement,uint32_t count,const void * dataPtr)815 void FragmentOutExecutor::addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement,
816                                        uint32_t count, const void *dataPtr)
817 {
818     // Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
819     // this value is usually 4 and current tests meet this requirement but
820     // if this changes in future then this limit should be verified in checkSupport
821 #ifndef CTS_USES_VULKANSC
822     if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
823         ((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
824     {
825         DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
826     }
827 #endif // CTS_USES_VULKANSC
828 
829     // Add binding specification
830     const uint32_t binding                                   = (uint32_t)m_vertexBindingDescriptions.size();
831     const VkVertexInputBindingDescription bindingDescription = {binding, sizePerElement, VK_VERTEX_INPUT_RATE_VERTEX};
832 
833     m_vertexBindingDescriptions.push_back(bindingDescription);
834 
835     // Add location and format specification
836     const VkVertexInputAttributeDescription attributeDescription = {
837         bindingLocation, // uint32_t location;
838         binding,         // uint32_t binding;
839         format,          // VkFormat format;
840         0u,              // uint32_t offsetInBytes;
841     };
842 
843     m_vertexAttributeDescriptions.push_back(attributeDescription);
844 
845     // Upload data to buffer
846     const VkDevice vkDevice         = m_context.getDevice();
847     const DeviceInterface &vk       = m_context.getDeviceInterface();
848     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
849 
850     const VkDeviceSize inputSize                = sizePerElement * count;
851     const VkBufferCreateInfo vertexBufferParams = {
852         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
853         DE_NULL,                              // const void* pNext;
854         0u,                                   // VkBufferCreateFlags flags;
855         inputSize,                            // VkDeviceSize size;
856         VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,    // VkBufferUsageFlags usage;
857         VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
858         1u,                                   // uint32_t queueFamilyCount;
859         &queueFamilyIndex                     // const uint32_t* pQueueFamilyIndices;
860     };
861 
862     Move<VkBuffer> buffer         = createBuffer(vk, vkDevice, &vertexBufferParams);
863     de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(
864         getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
865 
866     VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
867 
868     deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
869     flushAlloc(vk, vkDevice, *alloc);
870 
871     m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer>>(new Unique<VkBuffer>(buffer)));
872     m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
873 }
874 
bindAttributes(int numValues,const void * const * inputs)875 void FragmentOutExecutor::bindAttributes(int numValues, const void *const *inputs)
876 {
877     // Input attributes
878     for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
879     {
880         const Symbol &symbol          = m_shaderSpec.inputs[inputNdx];
881         const void *ptr               = inputs[inputNdx];
882         const glu::DataType basicType = symbol.varType.getBasicType();
883         const int vecSize             = glu::getDataTypeScalarSize(basicType);
884         const VkFormat format         = getAttributeFormat(basicType);
885         int elementSize               = 0;
886         int numAttrsToAdd             = 1;
887 
888         if (glu::isDataTypeDoubleOrDVec(basicType))
889             elementSize = sizeof(double);
890         if (glu::isDataTypeFloatOrVec(basicType))
891             elementSize = sizeof(float);
892         else if (glu::isDataTypeFloat16OrVec(basicType))
893             elementSize = sizeof(uint16_t);
894         else if (glu::isDataTypeIntOrIVec(basicType))
895             elementSize = sizeof(int);
896         else if (glu::isDataTypeUintOrUVec(basicType))
897             elementSize = sizeof(uint32_t);
898         else if (glu::isDataTypeMatrix(basicType))
899         {
900             int numRows = glu::getDataTypeMatrixNumRows(basicType);
901             int numCols = glu::getDataTypeMatrixNumColumns(basicType);
902 
903             elementSize   = numRows * numCols * (int)sizeof(float);
904             numAttrsToAdd = numCols;
905         }
906         else
907             DE_ASSERT(false);
908 
909         // add attributes, in case of matrix every column is binded as an attribute
910         for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
911         {
912             addAttribute((uint32_t)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
913         }
914     }
915 }
916 
clearRenderData(void)917 void FragmentOutExecutor::clearRenderData(void)
918 {
919     m_vertexBindingDescriptions.clear();
920     m_vertexAttributeDescriptions.clear();
921     m_vertexBuffers.clear();
922     m_vertexBufferAllocs.clear();
923 }
924 
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)925 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout(const DeviceInterface &vkd, VkDevice device)
926 {
927     const VkDescriptorSetLayoutCreateInfo createInfo = {
928         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, DE_NULL, (VkDescriptorSetLayoutCreateFlags)0, 0u, DE_NULL,
929     };
930     return createDescriptorSetLayout(vkd, device, &createInfo);
931 }
932 
createEmptyDescriptorPool(const DeviceInterface & vkd,VkDevice device)933 static Move<VkDescriptorPool> createEmptyDescriptorPool(const DeviceInterface &vkd, VkDevice device)
934 {
935     const VkDescriptorPoolSize emptySize = {
936         VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
937         1u,
938     };
939     const VkDescriptorPoolCreateInfo createInfo = {
940         VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
941         DE_NULL,
942         (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
943         1u,
944         1u,
945         &emptySize};
946     return createDescriptorPool(vkd, device, &createInfo);
947 }
948 
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)949 static Move<VkDescriptorSet> allocateSingleDescriptorSet(const DeviceInterface &vkd, VkDevice device,
950                                                          VkDescriptorPool pool, VkDescriptorSetLayout layout)
951 {
952     const VkDescriptorSetAllocateInfo allocInfo = {
953         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL, pool, 1u, &layout,
954     };
955     return allocateDescriptorSet(vkd, device, &allocInfo);
956 }
957 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)958 void FragmentOutExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
959                                   VkDescriptorSet extraResources)
960 {
961     const VkDevice vkDevice         = m_context.getDevice();
962     const DeviceInterface &vk       = m_context.getDeviceInterface();
963     const VkQueue queue             = m_context.getUniversalQueue();
964     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
965     Allocator &memAlloc             = m_context.getDefaultAllocator();
966 
967     const uint32_t renderSizeX = de::min(static_cast<uint32_t>(128), (uint32_t)numValues);
968     const uint32_t renderSizeY =
969         ((uint32_t)numValues / renderSizeX) + (((uint32_t)numValues % renderSizeX != 0) ? 1u : 0u);
970     const tcu::UVec2 renderSize(renderSizeX, renderSizeY);
971     std::vector<tcu::Vec2> positions;
972 
973     const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
974 
975     std::vector<VkImageSp> colorImages;
976     std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
977     std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
978     std::vector<AllocationSp> colorImageAllocs;
979     std::vector<VkAttachmentDescription> attachments;
980     std::vector<VkClearValue> attachmentClearValues;
981     std::vector<VkImageViewSp> colorImageViews;
982 
983     std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
984     std::vector<VkAttachmentReference> colorAttachmentReferences;
985 
986     Move<VkRenderPass> renderPass;
987     Move<VkFramebuffer> framebuffer;
988     Move<VkPipelineLayout> pipelineLayout;
989     Move<VkPipeline> graphicsPipeline;
990 
991     Move<VkShaderModule> vertexShaderModule;
992     Move<VkShaderModule> geometryShaderModule;
993     Move<VkShaderModule> fragmentShaderModule;
994 
995     Move<VkCommandPool> cmdPool;
996     Move<VkCommandBuffer> cmdBuffer;
997 
998     Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout(createEmptyDescriptorSetLayout(vk, vkDevice));
999     Unique<VkDescriptorPool> emptyDescriptorPool(createEmptyDescriptorPool(vk, vkDevice));
1000     Unique<VkDescriptorSet> emptyDescriptorSet(
1001         allocateSingleDescriptorSet(vk, vkDevice, *emptyDescriptorPool, *emptyDescriptorSetLayout));
1002 
1003     clearRenderData();
1004 
1005     // Compute positions - 1px points are used to drive fragment shading.
1006     positions = computeVertexPositions(numValues, renderSize.cast<int>());
1007 
1008     // Bind attributes
1009     addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (uint32_t)positions.size(), &positions[0]);
1010     bindAttributes(numValues, inputs);
1011 
1012     // Create color images
1013     {
1014         const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
1015             VK_FALSE,             // VkBool32 blendEnable;
1016             VK_BLEND_FACTOR_ONE,  // VkBlendFactor srcColorBlendFactor;
1017             VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
1018             VK_BLEND_OP_ADD,      // VkBlendOp blendOpColor;
1019             VK_BLEND_FACTOR_ONE,  // VkBlendFactor srcAlphaBlendFactor;
1020             VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
1021             VK_BLEND_OP_ADD,      // VkBlendOp blendOpAlpha;
1022             (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
1023              VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
1024         };
1025 
1026         for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
1027         {
1028             const bool isDouble   = glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1029             const bool isFloat    = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1030             const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1031             const bool isSigned   = isDataTypeIntOrIVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1032             const bool isBool     = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1033             const VkFormat colorFormat =
1034                 (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT :
1035                             (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT :
1036                                           (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT :
1037                                                      (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT :
1038                                                                            VK_FORMAT_R32G32B32A32_UINT))));
1039 
1040             {
1041                 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(
1042                     m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
1043                 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
1044                     TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
1045             }
1046 
1047             const VkImageCreateInfo colorImageParams = {
1048                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                   // VkStructureType sType;
1049                 DE_NULL,                                                               // const void* pNext;
1050                 0u,                                                                    // VkImageCreateFlags flags;
1051                 VK_IMAGE_TYPE_2D,                                                      // VkImageType imageType;
1052                 colorFormat,                                                           // VkFormat format;
1053                 {renderSize.x(), renderSize.y(), 1u},                                  // VkExtent3D extent;
1054                 1u,                                                                    // uint32_t mipLevels;
1055                 1u,                                                                    // uint32_t arraySize;
1056                 VK_SAMPLE_COUNT_1_BIT,                                                 // VkSampleCountFlagBits samples;
1057                 VK_IMAGE_TILING_OPTIMAL,                                               // VkImageTiling tiling;
1058                 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
1059                 VK_SHARING_MODE_EXCLUSIVE,                                             // VkSharingMode sharingMode;
1060                 1u,                                                                    // uint32_t queueFamilyCount;
1061                 &queueFamilyIndex,         // const uint32_t* pQueueFamilyIndices;
1062                 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1063             };
1064 
1065             const VkAttachmentDescription colorAttachmentDescription = {
1066                 0u,                                       // VkAttachmentDescriptorFlags flags;
1067                 colorFormat,                              // VkFormat format;
1068                 VK_SAMPLE_COUNT_1_BIT,                    // VkSampleCountFlagBits samples;
1069                 VK_ATTACHMENT_LOAD_OP_CLEAR,              // VkAttachmentLoadOp loadOp;
1070                 VK_ATTACHMENT_STORE_OP_STORE,             // VkAttachmentStoreOp storeOp;
1071                 VK_ATTACHMENT_LOAD_OP_DONT_CARE,          // VkAttachmentLoadOp stencilLoadOp;
1072                 VK_ATTACHMENT_STORE_OP_DONT_CARE,         // VkAttachmentStoreOp stencilStoreOp;
1073                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
1074                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1075             };
1076 
1077             Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1078             colorImages.push_back(de::SharedPtr<Unique<VkImage>>(new Unique<VkImage>(colorImage)));
1079             attachmentClearValues.push_back(getDefaultClearColor());
1080 
1081             // Allocate and bind color image memory
1082             {
1083                 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(
1084                     getImageMemoryRequirements(vk, vkDevice, *((const VkImage *)colorImages.back().get())),
1085                     MemoryRequirement::Any);
1086                 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(),
1087                                             colorImageAlloc->getOffset()));
1088                 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1089 
1090                 attachments.push_back(colorAttachmentDescription);
1091                 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1092 
1093                 const VkAttachmentReference colorAttachmentReference = {
1094                     (uint32_t)(colorImages.size() - 1),      // uint32_t attachment;
1095                     VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1096                 };
1097 
1098                 colorAttachmentReferences.push_back(colorAttachmentReference);
1099             }
1100 
1101             // Create color attachment view
1102             {
1103                 const VkImageViewCreateInfo colorImageViewParams = {
1104                     VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1105                     DE_NULL,                                  // const void* pNext;
1106                     0u,                                       // VkImageViewCreateFlags flags;
1107                     colorImages.back().get()->get(),          // VkImage image;
1108                     VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
1109                     colorFormat,                              // VkFormat format;
1110                     {
1111                         VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1112                         VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1113                         VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1114                         VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle a;
1115                     },                          // VkComponentMapping components;
1116                     {
1117                         VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1118                         0u,                        // uint32_t baseMipLevel;
1119                         1u,                        // uint32_t mipLevels;
1120                         0u,                        // uint32_t baseArraySlice;
1121                         1u                         // uint32_t arraySize;
1122                     }                              // VkImageSubresourceRange subresourceRange;
1123                 };
1124 
1125                 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1126                 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView>>(new Unique<VkImageView>(colorImageView)));
1127 
1128                 const VkImageMemoryBarrier colorImagePreRenderBarrier = {
1129                     VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                                       // sType
1130                     DE_NULL,                                                                      // pNext
1131                     0u,                                                                           // srcAccessMask
1132                     (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1133                     VK_IMAGE_LAYOUT_UNDEFINED,                                                    // oldLayout
1134                     VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                                     // newLayout
1135                     VK_QUEUE_FAMILY_IGNORED,                                                      // srcQueueFamilyIndex
1136                     VK_QUEUE_FAMILY_IGNORED,                                                      // dstQueueFamilyIndex
1137                     colorImages.back().get()->get(),                                              // image
1138                     {
1139                         VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1140                         0u,                        // baseMipLevel
1141                         1u,                        // levelCount
1142                         0u,                        // baseArrayLayer
1143                         1u,                        // layerCount
1144                     }                              // subresourceRange
1145                 };
1146                 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1147 
1148                 const VkImageMemoryBarrier colorImagePostRenderBarrier = {
1149                     VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                                       // sType
1150                     DE_NULL,                                                                      // pNext
1151                     (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1152                     VK_ACCESS_TRANSFER_READ_BIT,                                                  // dstAccessMask
1153                     VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                                     // oldLayout
1154                     VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,                                         // newLayout
1155                     VK_QUEUE_FAMILY_IGNORED,                                                      // srcQueueFamilyIndex
1156                     VK_QUEUE_FAMILY_IGNORED,                                                      // dstQueueFamilyIndex
1157                     colorImages.back().get()->get(),                                              // image
1158                     {
1159                         VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1160                         0u,                        // baseMipLevel
1161                         1u,                        // levelCount
1162                         0u,                        // baseArrayLayer
1163                         1u,                        // layerCount
1164                     }                              // subresourceRange
1165                 };
1166                 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1167             }
1168         }
1169     }
1170 
1171     // Create render pass
1172     {
1173         const VkSubpassDescription subpassDescription = {
1174             0u,                              // VkSubpassDescriptionFlags flags;
1175             VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1176             0u,                              // uint32_t inputCount;
1177             DE_NULL,                         // const VkAttachmentReference* pInputAttachments;
1178             (uint32_t)colorImages.size(),    // uint32_t colorCount;
1179             &colorAttachmentReferences[0],   // const VkAttachmentReference* colorAttachments;
1180             DE_NULL,                         // const VkAttachmentReference* resolveAttachments;
1181             DE_NULL,                         // VkAttachmentReference depthStencilAttachment;
1182             0u,                              // uint32_t preserveCount;
1183             DE_NULL                          // const VkAttachmentReference* pPreserveAttachments;
1184         };
1185 
1186         const VkRenderPassCreateInfo renderPassParams = {
1187             VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1188             DE_NULL,                                   // const void* pNext;
1189             (VkRenderPassCreateFlags)0,                // VkRenderPassCreateFlags flags;
1190             (uint32_t)attachments.size(),              // uint32_t attachmentCount;
1191             &attachments[0],                           // const VkAttachmentDescription* pAttachments;
1192             1u,                                        // uint32_t subpassCount;
1193             &subpassDescription,                       // const VkSubpassDescription* pSubpasses;
1194             0u,                                        // uint32_t dependencyCount;
1195             DE_NULL                                    // const VkSubpassDependency* pDependencies;
1196         };
1197 
1198         renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1199     }
1200 
1201     // Create framebuffer
1202     {
1203         std::vector<VkImageView> views(colorImageViews.size());
1204         for (size_t i = 0; i < colorImageViews.size(); i++)
1205         {
1206             views[i] = colorImageViews[i].get()->get();
1207         }
1208 
1209         const VkFramebufferCreateInfo framebufferParams = {
1210             VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1211             DE_NULL,                                   // const void* pNext;
1212             0u,                                        // VkFramebufferCreateFlags flags;
1213             *renderPass,                               // VkRenderPass renderPass;
1214             (uint32_t)views.size(),                    // uint32_t attachmentCount;
1215             &views[0],                                 // const VkImageView* pAttachments;
1216             (uint32_t)renderSize.x(),                  // uint32_t width;
1217             (uint32_t)renderSize.y(),                  // uint32_t height;
1218             1u                                         // uint32_t layers;
1219         };
1220 
1221         framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1222     }
1223 
1224     // Create pipeline layout
1225     {
1226         const VkDescriptorSetLayout setLayouts[]              = {*emptyDescriptorSetLayout, m_extraResourcesLayout};
1227         const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
1228             VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1229             DE_NULL,                                       // const void* pNext;
1230             (VkPipelineLayoutCreateFlags)0,                // VkPipelineLayoutCreateFlags flags;
1231             (m_extraResourcesLayout != 0 ? 2u : 0u),       // uint32_t descriptorSetCount;
1232             setLayouts,                                    // const VkDescriptorSetLayout* pSetLayouts;
1233             0u,                                            // uint32_t pushConstantRangeCount;
1234             DE_NULL                                        // const VkPushConstantRange* pPushConstantRanges;
1235         };
1236 
1237         pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1238     }
1239 
1240     // Create shaders
1241     {
1242         vertexShaderModule   = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1243         fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1244 
1245         if (useGeometryShader)
1246         {
1247             if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1248                 geometryShaderModule =
1249                     createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1250             else
1251                 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1252         }
1253     }
1254 
1255     // Create pipeline
1256     {
1257         const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
1258             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1259             DE_NULL,                                                   // const void* pNext;
1260             (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
1261             (uint32_t)m_vertexBindingDescriptions.size(),              // uint32_t bindingCount;
1262             &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1263             (uint32_t)m_vertexAttributeDescriptions.size(), // uint32_t attributeCount;
1264             &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1265         };
1266 
1267         const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
1268         const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
1269 
1270         const VkPipelineColorBlendStateCreateInfo colorBlendStateParams = {
1271             VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1272             DE_NULL,                                                  // const void* pNext;
1273             (VkPipelineColorBlendStateCreateFlags)0,                  // VkPipelineColorBlendStateCreateFlags flags;
1274             VK_FALSE,                                                 // VkBool32 logicOpEnable;
1275             VK_LOGIC_OP_COPY,                                         // VkLogicOp logicOp;
1276             (uint32_t)colorBlendAttachmentStates.size(),              // uint32_t attachmentCount;
1277             &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1278             {0.0f, 0.0f, 0.0f, 0.0f}        // float blendConst[4];
1279         };
1280 
1281         graphicsPipeline = makeGraphicsPipeline(
1282             vk,                  // const DeviceInterface&                        vk
1283             vkDevice,            // const VkDevice                                device
1284             *pipelineLayout,     // const VkPipelineLayout                        pipelineLayout
1285             *vertexShaderModule, // const VkShaderModule                          vertexShaderModule
1286             DE_NULL,             // const VkShaderModule                          tessellationControlShaderModule
1287             DE_NULL,             // const VkShaderModule                          tessellationEvalShaderModule
1288             useGeometryShader ? *geometryShaderModule :
1289                                 DE_NULL,      // const VkShaderModule                          geometryShaderModule
1290             *fragmentShaderModule,            // const VkShaderModule                          fragmentShaderModule
1291             *renderPass,                      // const VkRenderPass                            renderPass
1292             viewports,                        // const std::vector<VkViewport>&                viewports
1293             scissors,                         // const std::vector<VkRect2D>&                  scissors
1294             VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology                     topology
1295             0u,                               // const uint32_t                                subpass
1296             0u,                               // const uint32_t                                patchControlPoints
1297             &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
1298             DE_NULL,                 // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1299             DE_NULL,                 // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
1300             DE_NULL,                 // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
1301             &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
1302     }
1303 
1304     // Create command pool
1305     cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1306 
1307     // Create command buffer
1308     {
1309         cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1310 
1311         beginCommandBuffer(vk, *cmdBuffer);
1312 
1313         vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1314                               vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0,
1315                               (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL,
1316                               (uint32_t)colorImagePreRenderBarriers.size(),
1317                               colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1318         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
1319                         (uint32_t)attachmentClearValues.size(), &attachmentClearValues[0]);
1320 
1321         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1322 
1323         if (m_extraResourcesLayout != 0)
1324         {
1325             DE_ASSERT(extraResources != 0);
1326             const VkDescriptorSet descriptorSets[] = {*emptyDescriptorSet, extraResources};
1327             vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
1328                                      DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1329         }
1330         else
1331             DE_ASSERT(extraResources == 0);
1332 
1333         const uint32_t numberOfVertexAttributes = (uint32_t)m_vertexBuffers.size();
1334 
1335         std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1336 
1337         std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1338         for (size_t i = 0; i < numberOfVertexAttributes; i++)
1339         {
1340             buffers[i] = m_vertexBuffers[i].get()->get();
1341         }
1342 
1343         vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1344         vk.cmdDraw(*cmdBuffer, (uint32_t)positions.size(), 1u, 0u, 0u);
1345 
1346         endRenderPass(vk, *cmdBuffer);
1347         vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1348                               vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0,
1349                               (const VkMemoryBarrier *)DE_NULL, 0, (const VkBufferMemoryBarrier *)DE_NULL,
1350                               (uint32_t)colorImagePostRenderBarriers.size(),
1351                               colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1352 
1353         endCommandBuffer(vk, *cmdBuffer);
1354     }
1355 
1356     // Execute Draw
1357     submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1358 
1359     // Read back result and output
1360     {
1361         const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(uint32_t) * renderSize.x() * renderSize.y());
1362         const VkBufferCreateInfo readImageBufferParams = {
1363             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1364             DE_NULL,                              // const void* pNext;
1365             0u,                                   // VkBufferCreateFlags flags;
1366             imageSizeBytes,                       // VkDeviceSize size;
1367             VK_BUFFER_USAGE_TRANSFER_DST_BIT,     // VkBufferUsageFlags usage;
1368             VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
1369             1u,                                   // uint32_t queueFamilyCount;
1370             &queueFamilyIndex,                    // const uint32_t* pQueueFamilyIndices;
1371         };
1372 
1373         // constants for image copy
1374         Move<VkCommandPool> copyCmdPool =
1375             createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1376 
1377         const VkBufferImageCopy copyParams = {
1378             0u,                       // VkDeviceSize bufferOffset;
1379             (uint32_t)renderSize.x(), // uint32_t bufferRowLength;
1380             (uint32_t)renderSize.y(), // uint32_t bufferImageHeight;
1381             {
1382                 VK_IMAGE_ASPECT_COLOR_BIT,       // VkImageAspect aspect;
1383                 0u,                              // uint32_t mipLevel;
1384                 0u,                              // uint32_t arraySlice;
1385                 1u,                              // uint32_t arraySize;
1386             },                                   // VkImageSubresource imageSubresource;
1387             {0u, 0u, 0u},                        // VkOffset3D imageOffset;
1388             {renderSize.x(), renderSize.y(), 1u} // VkExtent3D imageExtent;
1389         };
1390 
1391         // Read back pixels.
1392         for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1393         {
1394             const Symbol &output  = m_shaderSpec.outputs[outNdx];
1395             const int outSize     = output.varType.getScalarSize();
1396             const int outVecSize  = glu::getDataTypeNumComponents(output.varType.getBasicType());
1397             const int outNumLocs  = glu::getDataTypeNumLocations(output.varType.getBasicType());
1398             const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1399 
1400             for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1401             {
1402                 tcu::TextureLevel tmpBuf;
1403                 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1404                 const tcu::TextureFormat readFormat(tcu::TextureFormat::RGBA, format.type);
1405                 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1406                 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(
1407                     getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1408 
1409                 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(),
1410                                              readImageBufferMemory->getOffset()));
1411 
1412                 // Copy image to buffer
1413                 {
1414 
1415                     Move<VkCommandBuffer> copyCmdBuffer =
1416                         allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1417 
1418                     beginCommandBuffer(vk, *copyCmdBuffer);
1419                     vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(),
1420                                             VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
1421 
1422                     // Insert a barrier so data written by the transfer is available to the host
1423                     {
1424                         const VkBufferMemoryBarrier barrier = {
1425                             VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType    sType;
1426                             DE_NULL,                                 // const void*        pNext;
1427                             VK_ACCESS_TRANSFER_WRITE_BIT,            // VkAccessFlags      srcAccessMask;
1428                             VK_ACCESS_HOST_READ_BIT,                 // VkAccessFlags      dstAccessMask;
1429                             VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           srcQueueFamilyIndex;
1430                             VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           dstQueueFamilyIndex;
1431                             *readImageBuffer,                        // VkBuffer           buffer;
1432                             0,                                       // VkDeviceSize       offset;
1433                             VK_WHOLE_SIZE,                           // VkDeviceSize       size;
1434                         };
1435 
1436                         vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT,
1437                                               vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0,
1438                                               (const VkMemoryBarrier *)DE_NULL, 1, &barrier, 0,
1439                                               (const VkImageMemoryBarrier *)DE_NULL);
1440                     }
1441 
1442                     endCommandBuffer(vk, *copyCmdBuffer);
1443 
1444                     submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1445                 }
1446 
1447                 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1448 
1449                 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1450 
1451                 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1452                 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1,
1453                                                                readImageBufferMemory->getHostPtr());
1454 
1455                 tcu::copy(tmpBuf.getAccess(), resultAccess);
1456 
1457                 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1458                 {
1459                     uint16_t *dstPtrBase = static_cast<uint16_t *>(outputs[outNdx]);
1460                     if (outSize == 4 && outNumLocs == 1)
1461                         deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1462                                  numValues * outVecSize * sizeof(uint16_t));
1463                     else
1464                     {
1465                         for (int valNdx = 0; valNdx < numValues; valNdx++)
1466                         {
1467                             const uint16_t *srcPtr = (const uint16_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1468                             uint16_t *dstPtr       = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1469                             deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint16_t));
1470                         }
1471                     }
1472                 }
1473                 else
1474                 {
1475                     uint32_t *dstPtrBase = static_cast<uint32_t *>(outputs[outNdx]);
1476                     if (outSize == 4 && outNumLocs == 1)
1477                         deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1478                                  numValues * outVecSize * sizeof(uint32_t));
1479                     else
1480                     {
1481                         for (int valNdx = 0; valNdx < numValues; valNdx++)
1482                         {
1483                             const uint32_t *srcPtr = (const uint32_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1484                             uint32_t *dstPtr       = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1485                             deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint32_t));
1486                         }
1487                     }
1488                 }
1489             }
1490         }
1491     }
1492 }
1493 
1494 // VertexShaderExecutor
1495 
1496 class VertexShaderExecutor : public FragmentOutExecutor
1497 {
1498 public:
1499     VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1500     virtual ~VertexShaderExecutor(void);
1501 
1502     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &dst);
1503 };
1504 
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1505 VertexShaderExecutor::VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1506                                            VkDescriptorSetLayout extraResourcesLayout)
1507     : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1508 {
1509 }
1510 
~VertexShaderExecutor(void)1511 VertexShaderExecutor::~VertexShaderExecutor(void)
1512 {
1513 }
1514 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1515 void VertexShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1516 {
1517     const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1518 
1519     programCollection.glslSources.add("vert")
1520         << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1521     /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1522     programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1523                                                      shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1524                                               << shaderSpec.buildOptions;
1525 }
1526 
1527 // GeometryShaderExecutor
1528 
1529 class GeometryShaderExecutor : public FragmentOutExecutor
1530 {
1531 public:
1532     GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1533     virtual ~GeometryShaderExecutor(void);
1534 
1535     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1536 };
1537 
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1538 GeometryShaderExecutor::GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1539                                                VkDescriptorSetLayout extraResourcesLayout)
1540     : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1541 {
1542     const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
1543 
1544     if (!features.geometryShader)
1545         TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1546 }
1547 
~GeometryShaderExecutor(void)1548 GeometryShaderExecutor::~GeometryShaderExecutor(void)
1549 {
1550 }
1551 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1552 void GeometryShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1553 {
1554     const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1555 
1556     programCollection.glslSources.add("vert")
1557         << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1558 
1559     programCollection.glslSources.add("geom")
1560         << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false))
1561         << shaderSpec.buildOptions;
1562     programCollection.glslSources.add("geom_point_size")
1563         << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true))
1564         << shaderSpec.buildOptions;
1565 
1566     /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1567     programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1568                                                      shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_"))
1569                                               << shaderSpec.buildOptions;
1570 }
1571 
1572 // FragmentShaderExecutor
1573 
1574 class FragmentShaderExecutor : public FragmentOutExecutor
1575 {
1576 public:
1577     FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1578     virtual ~FragmentShaderExecutor(void);
1579 
1580     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1581 };
1582 
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1583 FragmentShaderExecutor::FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1584                                                VkDescriptorSetLayout extraResourcesLayout)
1585     : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1586 {
1587 }
1588 
~FragmentShaderExecutor(void)1589 FragmentShaderExecutor::~FragmentShaderExecutor(void)
1590 {
1591 }
1592 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1593 void FragmentShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1594 {
1595     const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1596 
1597     programCollection.glslSources.add("vert")
1598         << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1599     /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1600     programCollection.glslSources.add("frag")
1601         << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1602         << shaderSpec.buildOptions;
1603 }
1604 
1605 // Shared utilities for compute and tess executors
1606 
getVecStd430ByteAlignment(glu::DataType type)1607 static uint32_t getVecStd430ByteAlignment(glu::DataType type)
1608 {
1609     uint32_t baseSize;
1610 
1611     switch (glu::getDataTypeScalarType(type))
1612     {
1613     case glu::TYPE_FLOAT16:
1614         baseSize = 2u;
1615         break;
1616     case glu::TYPE_DOUBLE:
1617         baseSize = 8u;
1618         break;
1619     default:
1620         baseSize = 4u;
1621         break;
1622     }
1623 
1624     switch (glu::getDataTypeScalarSize(type))
1625     {
1626     case 1:
1627         return baseSize;
1628     case 2:
1629         return baseSize * 2u;
1630     case 3: // fallthrough.
1631     case 4:
1632         return baseSize * 4u;
1633     default:
1634         DE_ASSERT(false);
1635         return 0u;
1636     }
1637 }
1638 
1639 class BufferIoExecutor : public ShaderExecutor
1640 {
1641 public:
1642     BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec);
1643     virtual ~BufferIoExecutor(void);
1644 
1645 protected:
1646     enum
1647     {
1648         INPUT_BUFFER_BINDING  = 0,
1649         OUTPUT_BUFFER_BINDING = 1,
1650     };
1651 
1652     void initBuffers(int numValues);
getInputBuffer(void) const1653     VkBuffer getInputBuffer(void) const
1654     {
1655         return *m_inputBuffer;
1656     }
getOutputBuffer(void) const1657     VkBuffer getOutputBuffer(void) const
1658     {
1659         return *m_outputBuffer;
1660     }
getInputStride(void) const1661     uint32_t getInputStride(void) const
1662     {
1663         return getLayoutStride(m_inputLayout);
1664     }
getOutputStride(void) const1665     uint32_t getOutputStride(void) const
1666     {
1667         return getLayoutStride(m_outputLayout);
1668     }
1669 
1670     void uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit);
1671     void readOutputBuffer(void *const *outputPtrs, int numValues);
1672 
1673     static void declareBufferBlocks(std::ostream &src, const ShaderSpec &spec);
1674     static void generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName);
1675 
1676 protected:
1677     Move<VkBuffer> m_inputBuffer;
1678     Move<VkBuffer> m_outputBuffer;
1679 
1680 private:
1681     struct VarLayout
1682     {
1683         uint32_t offset;
1684         uint32_t stride;
1685         uint32_t matrixStride;
1686 
VarLayoutvkt::shaderexecutor::__anon4bec95a50111::BufferIoExecutor::VarLayout1687         VarLayout(void) : offset(0), stride(0), matrixStride(0)
1688         {
1689         }
1690     };
1691 
1692     static void computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout);
1693     static uint32_t getLayoutStride(const vector<VarLayout> &layout);
1694 
1695     static void copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1696                              const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit);
1697     static void copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1698                                const void *srcBasePtr, void *dstBasePtr);
1699 
1700     de::MovePtr<Allocation> m_inputAlloc;
1701     de::MovePtr<Allocation> m_outputAlloc;
1702 
1703     vector<VarLayout> m_inputLayout;
1704     vector<VarLayout> m_outputLayout;
1705 };
1706 
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1707 BufferIoExecutor::BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec) : ShaderExecutor(context, shaderSpec)
1708 {
1709     computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1710     computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1711 }
1712 
~BufferIoExecutor(void)1713 BufferIoExecutor::~BufferIoExecutor(void)
1714 {
1715 }
1716 
getLayoutStride(const vector<VarLayout> & layout)1717 inline uint32_t BufferIoExecutor::getLayoutStride(const vector<VarLayout> &layout)
1718 {
1719     return layout.empty() ? 0 : layout[0].stride;
1720 }
1721 
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1722 void BufferIoExecutor::computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout)
1723 {
1724     uint32_t maxAlignment = 0;
1725     uint32_t curOffset    = 0;
1726 
1727     DE_ASSERT(layout != DE_NULL);
1728     DE_ASSERT(layout->empty());
1729     layout->resize(symbols.size());
1730 
1731     for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1732     {
1733         const Symbol &symbol          = symbols[varNdx];
1734         const glu::DataType basicType = symbol.varType.getBasicType();
1735         VarLayout &layoutEntry        = (*layout)[varNdx];
1736 
1737         if (glu::isDataTypeScalarOrVector(basicType))
1738         {
1739             const uint32_t alignment = getVecStd430ByteAlignment(basicType);
1740             const uint32_t size =
1741                 (uint32_t)glu::getDataTypeScalarSize(basicType) *
1742                 (isDataTypeDoubleType(basicType) ?
1743                      (int)(sizeof(uint64_t)) :
1744                      (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1745 
1746             curOffset    = (uint32_t)deAlign32((int)curOffset, (int)alignment);
1747             maxAlignment = de::max(maxAlignment, alignment);
1748 
1749             layoutEntry.offset       = curOffset;
1750             layoutEntry.matrixStride = 0;
1751 
1752             curOffset += size;
1753         }
1754         else if (glu::isDataTypeMatrix(basicType))
1755         {
1756             const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1757             const glu::DataType vecType =
1758                 glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1759             const uint32_t vecAlignment = getVecStd430ByteAlignment(vecType);
1760 
1761             curOffset    = (uint32_t)deAlign32((int)curOffset, (int)vecAlignment);
1762             maxAlignment = de::max(maxAlignment, vecAlignment);
1763 
1764             layoutEntry.offset       = curOffset;
1765             layoutEntry.matrixStride = vecAlignment;
1766 
1767             curOffset += vecAlignment * numVecs;
1768         }
1769         else
1770             DE_ASSERT(false);
1771     }
1772 
1773     {
1774         const uint32_t totalSize = (uint32_t)deAlign32(curOffset, maxAlignment);
1775 
1776         for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1777             varIter->stride = totalSize;
1778     }
1779 }
1780 
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1781 void BufferIoExecutor::declareBufferBlocks(std::ostream &src, const ShaderSpec &spec)
1782 {
1783     // Input struct
1784     if (!spec.inputs.empty())
1785     {
1786         glu::StructType inputStruct("Inputs");
1787         for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1788             inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1789         src << glu::declare(&inputStruct) << ";\n";
1790     }
1791 
1792     // Output struct
1793     {
1794         glu::StructType outputStruct("Outputs");
1795         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1796             outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1797         src << glu::declare(&outputStruct) << ";\n";
1798     }
1799 
1800     src << "\n";
1801 
1802     if (!spec.inputs.empty())
1803     {
1804         src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1805             << "{\n"
1806             << "    Inputs inputs[];\n"
1807             << "};\n";
1808     }
1809 
1810     src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1811         << "{\n"
1812         << "    Outputs outputs[];\n"
1813         << "};\n"
1814         << "\n";
1815 }
1816 
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1817 void BufferIoExecutor::generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName)
1818 {
1819     std::string tname;
1820     for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1821     {
1822         const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1823         if (f16BitTest)
1824         {
1825             tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1826         }
1827         else
1828         {
1829             tname = glu::getDataTypeName(symIter->varType.getBasicType());
1830         }
1831         src << "\t" << tname << " " << symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]."
1832             << symIter->name << ");\n";
1833     }
1834 
1835     for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1836     {
1837         const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1838         if (f16BitTest)
1839         {
1840             tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1841         }
1842         else
1843         {
1844             tname = glu::getDataTypeName(symIter->varType.getBasicType());
1845         }
1846         src << "\t" << tname << " " << symIter->name << ";\n";
1847         if (f16BitTest)
1848         {
1849             const char *ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1850             src << "\t" << ttname << " "
1851                 << "packed_" << symIter->name << ";\n";
1852         }
1853     }
1854 
1855     src << "\n";
1856 
1857     {
1858         std::istringstream opSrc(spec.source);
1859         std::string line;
1860 
1861         while (std::getline(opSrc, line))
1862             src << "\t" << line << "\n";
1863     }
1864 
1865     if (spec.packFloat16Bit)
1866         packFloat16Bit(src, spec.outputs);
1867 
1868     src << "\n";
1869     for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1870     {
1871         const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1872         if (f16BitTest)
1873             src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1874         else
1875             src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1876     }
1877 }
1878 
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1879 void BufferIoExecutor::copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1880                                     const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit)
1881 {
1882     if (varType.isBasicType())
1883     {
1884         const glu::DataType basicType = varType.getBasicType();
1885         const bool isMatrix           = glu::isDataTypeMatrix(basicType);
1886         const int scalarSize          = glu::getDataTypeScalarSize(basicType);
1887         const int numVecs             = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1888         const int numComps            = scalarSize / numVecs;
1889         const int size                = (glu::isDataTypeDoubleType(basicType) ?
1890                                              (int)sizeof(uint64_t) :
1891                                              (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1892 
1893         for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1894         {
1895             for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1896             {
1897                 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1898                 const int dstOffset =
1899                     layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1900                 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1901                 uint8_t *dstPtr       = (uint8_t *)dstBasePtr + dstOffset;
1902 
1903                 if (packFloat16Bit)
1904                 {
1905                     // Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1906                     for (int cmpNdx = 0; cmpNdx < numComps; ++cmpNdx)
1907                     {
1908                         deFloat16 f16vals[2] = {};
1909                         f16vals[0]           = deFloat32To16Round(((float *)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1910                         deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1911                     }
1912                 }
1913                 else
1914                 {
1915                     deMemcpy(dstPtr, srcPtr, size * numComps);
1916                 }
1917             }
1918         }
1919     }
1920     else
1921         throw tcu::InternalError("Unsupported type");
1922 }
1923 
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1924 void BufferIoExecutor::copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1925                                       const void *srcBasePtr, void *dstBasePtr)
1926 {
1927     if (varType.isBasicType())
1928     {
1929         const glu::DataType basicType = varType.getBasicType();
1930         const bool isMatrix           = glu::isDataTypeMatrix(basicType);
1931         const int scalarSize          = glu::getDataTypeScalarSize(basicType);
1932         const int numVecs             = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1933         const int numComps            = scalarSize / numVecs;
1934 
1935         for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1936         {
1937             for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1938             {
1939                 const int size =
1940                     (glu::isDataTypeDoubleType(basicType) ?
1941                          (int)sizeof(uint64_t) :
1942                          (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1943                 const int srcOffset =
1944                     layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1945                 const int dstOffset   = size * (elemNdx * scalarSize + vecNdx * numComps);
1946                 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1947                 uint8_t *dstPtr       = (uint8_t *)dstBasePtr + dstOffset;
1948 
1949                 deMemcpy(dstPtr, srcPtr, size * numComps);
1950             }
1951         }
1952     }
1953     else
1954         throw tcu::InternalError("Unsupported type");
1955 }
1956 
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1957 void BufferIoExecutor::uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit)
1958 {
1959     const VkDevice vkDevice   = m_context.getDevice();
1960     const DeviceInterface &vk = m_context.getDeviceInterface();
1961 
1962     const uint32_t inputStride = getLayoutStride(m_inputLayout);
1963     const int inputBufferSize  = inputStride * numValues;
1964 
1965     if (inputBufferSize == 0)
1966         return; // No inputs
1967 
1968     DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1969     for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1970     {
1971         const glu::VarType &varType = m_shaderSpec.inputs[inputNdx].varType;
1972         const VarLayout &layout     = m_inputLayout[inputNdx];
1973 
1974         copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1975     }
1976 
1977     flushAlloc(vk, vkDevice, *m_inputAlloc);
1978 }
1979 
readOutputBuffer(void * const * outputPtrs,int numValues)1980 void BufferIoExecutor::readOutputBuffer(void *const *outputPtrs, int numValues)
1981 {
1982     const VkDevice vkDevice   = m_context.getDevice();
1983     const DeviceInterface &vk = m_context.getDeviceInterface();
1984 
1985     DE_ASSERT(numValues > 0); // At least some outputs are required.
1986 
1987     invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1988 
1989     DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1990     for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1991     {
1992         const glu::VarType &varType = m_shaderSpec.outputs[outputNdx].varType;
1993         const VarLayout &layout     = m_outputLayout[outputNdx];
1994 
1995         copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1996     }
1997 }
1998 
initBuffers(int numValues)1999 void BufferIoExecutor::initBuffers(int numValues)
2000 {
2001     const uint32_t inputStride  = getLayoutStride(m_inputLayout);
2002     const uint32_t outputStride = getLayoutStride(m_outputLayout);
2003     // Avoid creating zero-sized buffer/memory
2004     const size_t inputBufferSize  = de::max(numValues * inputStride, 1u);
2005     const size_t outputBufferSize = numValues * outputStride;
2006 
2007     // Upload data to buffer
2008     const VkDevice vkDevice         = m_context.getDevice();
2009     const DeviceInterface &vk       = m_context.getDeviceInterface();
2010     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2011     Allocator &memAlloc             = m_context.getDefaultAllocator();
2012 
2013     const VkBufferCreateInfo inputBufferParams = {
2014         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2015         DE_NULL,                              // const void* pNext;
2016         0u,                                   // VkBufferCreateFlags flags;
2017         inputBufferSize,                      // VkDeviceSize size;
2018         VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,   // VkBufferUsageFlags usage;
2019         VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
2020         1u,                                   // uint32_t queueFamilyCount;
2021         &queueFamilyIndex                     // const uint32_t* pQueueFamilyIndices;
2022     };
2023 
2024     m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
2025     m_inputAlloc =
2026         memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
2027 
2028     VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
2029 
2030     const VkBufferCreateInfo outputBufferParams = {
2031         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2032         DE_NULL,                              // const void* pNext;
2033         0u,                                   // VkBufferCreateFlags flags;
2034         outputBufferSize,                     // VkDeviceSize size;
2035         VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,   // VkBufferUsageFlags usage;
2036         VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
2037         1u,                                   // uint32_t queueFamilyCount;
2038         &queueFamilyIndex                     // const uint32_t* pQueueFamilyIndices;
2039     };
2040 
2041     m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
2042     m_outputAlloc =
2043         memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
2044 
2045     VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
2046 }
2047 
2048 // ComputeShaderExecutor
2049 
2050 class ComputeShaderExecutor : public BufferIoExecutor
2051 {
2052 public:
2053     ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2054     virtual ~ComputeShaderExecutor(void);
2055 
2056     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
2057 
2058     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2059                          VkDescriptorSet extraResources);
2060 
2061 protected:
2062     static std::string generateComputeShader(const ShaderSpec &spec);
2063 
2064 private:
2065     const VkDescriptorSetLayout m_extraResourcesLayout;
2066 };
2067 
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2068 ComputeShaderExecutor::ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
2069                                              VkDescriptorSetLayout extraResourcesLayout)
2070     : BufferIoExecutor(context, shaderSpec)
2071     , m_extraResourcesLayout(extraResourcesLayout)
2072 {
2073 }
2074 
~ComputeShaderExecutor(void)2075 ComputeShaderExecutor::~ComputeShaderExecutor(void)
2076 {
2077 }
2078 
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)2079 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
2080 {
2081     switch (type)
2082     {
2083     case glu::TYPE_FLOAT16:
2084         return "%f16";
2085     case glu::TYPE_FLOAT16_VEC2:
2086         return "%v2f16";
2087     case glu::TYPE_FLOAT16_VEC3:
2088         return "%v3f16";
2089     case glu::TYPE_FLOAT16_VEC4:
2090         return "%v4f16";
2091     case glu::TYPE_FLOAT:
2092         return packFloat16Bit ? "%u32" : "%f32"; // f16 values will be bitcast from ui32.
2093     case glu::TYPE_FLOAT_VEC2:
2094         return packFloat16Bit ? "%v2u32" : "%v2f32"; // f16 values will be bitcast from ui32.
2095     case glu::TYPE_FLOAT_VEC3:
2096         return packFloat16Bit ? "%v3u32" : "%v3f32"; // f16 values will be bitcast from ui32.
2097     case glu::TYPE_FLOAT_VEC4:
2098         return packFloat16Bit ? "%v4u32" : "%v4f32"; // f16 values will be bitcast from ui32.
2099     case glu::TYPE_INT:
2100         return "%i32";
2101     case glu::TYPE_INT_VEC2:
2102         return "%v2i32";
2103     case glu::TYPE_INT_VEC3:
2104         return "%v3i32";
2105     case glu::TYPE_INT_VEC4:
2106         return "%v4i32";
2107     case glu::TYPE_DOUBLE:
2108         return "%f64";
2109     case glu::TYPE_DOUBLE_VEC2:
2110         return "%v2f64";
2111     case glu::TYPE_DOUBLE_VEC3:
2112         return "%v3f64";
2113     case glu::TYPE_DOUBLE_VEC4:
2114         return "%v4f64";
2115     default:
2116         DE_ASSERT(0);
2117         return "";
2118     }
2119 }
2120 
moveBitOperation(std::string variableName,const int operationNdx)2121 std::string moveBitOperation(std::string variableName, const int operationNdx)
2122 {
2123     std::ostringstream src;
2124     src << "\n"
2125         << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2126         << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_" << operationNdx << " %c_i32_1\n"
2127         << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2128     return src.str();
2129 }
2130 
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2131 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type,
2132                              const std::string &outputType, const int scalarSize)
2133 {
2134     std::ostringstream src;
2135     std::string boolType;
2136 
2137     switch (type)
2138     {
2139     case glu::TYPE_FLOAT16:
2140     case glu::TYPE_FLOAT:
2141     case glu::TYPE_DOUBLE:
2142         src << "\n"
2143             << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2144             << "OpSelectionMerge %IF_" << operationNdx << " None\n"
2145             << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_"
2146             << operationNdx << "\n"
2147             << "%label_IF_" << operationNdx << " = OpLabel\n"
2148             << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2149             << "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2150             << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_"
2151             << operationNdx << "\n"
2152             << "OpStore %out0 %add_if_" << operationNdx << "\n"
2153             << "OpBranch %IF_" << operationNdx << "\n"
2154             << "%IF_" << operationNdx << " = OpLabel\n";
2155         return src.str();
2156     case glu::TYPE_FLOAT16_VEC2:
2157     case glu::TYPE_FLOAT_VEC2:
2158     case glu::TYPE_DOUBLE_VEC2:
2159         boolType = "%v2bool";
2160         break;
2161     case glu::TYPE_FLOAT16_VEC3:
2162     case glu::TYPE_FLOAT_VEC3:
2163     case glu::TYPE_DOUBLE_VEC3:
2164         boolType = "%v3bool";
2165         break;
2166     case glu::TYPE_FLOAT16_VEC4:
2167     case glu::TYPE_FLOAT_VEC4:
2168     case glu::TYPE_DOUBLE_VEC4:
2169         boolType = "%v4bool";
2170         break;
2171     default:
2172         DE_ASSERT(0);
2173         return "";
2174     }
2175 
2176     src << "\n"
2177         << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2178         << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx
2179         << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2180         << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2181 
2182     src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2183     for (int ndx = 0; ndx < scalarSize; ++ndx)
2184         src << " %operation_val_" << operationNdx;
2185     src << "\n";
2186 
2187     src << "%toAdd" << operationNdx << " = OpIMul " << outputType << " %ivec_result_" << operationNdx
2188         << " %operation_vec_" << operationNdx << "\n"
2189         << "%out_val_" << operationNdx << " = OpLoad " << outputType << " %out0\n"
2190 
2191         << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd"
2192         << operationNdx << "\n"
2193         << "OpStore %out0 %add_if_" << operationNdx << "\n";
2194 
2195     return src.str();
2196 }
2197 
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2198 std::string generateSpirv(const ShaderSpec &spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2199 {
2200     static const std::string COMPARE_OPERATIONS[] = {"OpFOrdEqual",
2201                                                      "OpFOrdGreaterThan",
2202                                                      "OpFOrdLessThan",
2203                                                      "OpFOrdGreaterThanEqual",
2204                                                      "OpFOrdLessThanEqual",
2205                                                      "OpFUnordEqual",
2206                                                      "OpFUnordGreaterThan",
2207                                                      "OpFUnordLessThan",
2208                                                      "OpFUnordGreaterThanEqual",
2209                                                      "OpFUnordLessThanEqual"};
2210 
2211     int moveBitNdx = 0;
2212     vector<std::string> inputTypes;
2213     vector<std::string> outputTypes;
2214     const std::string packType =
2215         spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2216 
2217     vector<bool> floatResult;
2218     for (const auto &symbol : spec.outputs)
2219         floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2220 
2221     const bool anyFloatResult = std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2222 
2223     vector<bool> packFloatRes;
2224     for (const auto &floatRes : floatResult)
2225         packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2226 
2227     const bool useF32Types = (!are16Bit && !are64Bit);
2228     const bool useF64Types = are64Bit;
2229     const bool useF16Types = (spec.packFloat16Bit || are16Bit);
2230 
2231     for (const auto &symbol : spec.inputs)
2232         inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2233 
2234     for (const auto &symbol : spec.outputs)
2235         outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2236 
2237     DE_ASSERT(!inputTypes.empty());
2238     DE_ASSERT(!outputTypes.empty());
2239 
2240     // Assert input and output types match the expected operations.
2241     switch (spec.spirvCase)
2242     {
2243     case SPIRV_CASETYPE_COMPARE:
2244     case SPIRV_CASETYPE_FREM:
2245         DE_ASSERT(inputTypes.size() == 2);
2246         DE_ASSERT(outputTypes.size() == 1);
2247         break;
2248     case SPIRV_CASETYPE_MODFSTRUCT:
2249     case SPIRV_CASETYPE_FREXPSTRUCT:
2250         DE_ASSERT(inputTypes.size() == 1);
2251         DE_ASSERT(outputTypes.size() == 2);
2252         break;
2253     default:
2254         DE_ASSERT(false);
2255         break;
2256     }
2257 
2258     std::ostringstream src;
2259     src << "; SPIR-V\n"
2260            "; Version: 1.0\n"
2261            "; Generator: Khronos Glslang Reference Front End; 4\n"
2262            "; Bound: 114\n"
2263            "; Schema: 0\n"
2264            "OpCapability Shader\n";
2265 
2266     if (useF16Types)
2267         src << "OpCapability Float16\n";
2268 
2269     if (are16Bit)
2270         src << "OpCapability StorageBuffer16BitAccess\n"
2271                "OpCapability UniformAndStorageBuffer16BitAccess\n";
2272 
2273     if (useF64Types)
2274         src << "OpCapability Float64\n";
2275 
2276     if (are16Bit)
2277         src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2278 
2279     src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2280            "OpMemoryModel Logical GLSL450\n"
2281            "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2282            "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2283            "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2284            "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2285 
2286     // Input offsets and stride.
2287     {
2288         int offset  = 0;
2289         int ndx     = 0;
2290         int largest = 0;
2291         for (const auto &symbol : spec.inputs)
2292         {
2293             const int scalarSize = symbol.varType.getScalarSize();
2294             const int memberSize =
2295                 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2296                 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2297                      (int)sizeof(uint64_t) :
2298                      (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2299                                                                               (int)sizeof(uint32_t)));
2300             const int extraMemberBytes = (offset % memberSize);
2301 
2302             offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2303             src << "OpMemberDecorate %SSB0_IN " << ndx << " Offset " << offset << "\n";
2304             ++ndx;
2305 
2306             if (memberSize > largest)
2307                 largest = memberSize;
2308 
2309             offset += memberSize;
2310         }
2311         DE_ASSERT(largest > 0);
2312         const int extraBytes = (offset % largest);
2313         const int stride     = offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2314         src << "OpDecorate %up_SSB0_IN ArrayStride " << stride << "\n";
2315     }
2316 
2317     src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2318            "OpDecorate %ssboIN BufferBlock\n"
2319            "OpDecorate %ssbo_src DescriptorSet 0\n"
2320            "OpDecorate %ssbo_src Binding 0\n"
2321            "\n";
2322 
2323     if (isMediump)
2324     {
2325         for (size_t i = 0; i < inputTypes.size(); ++i)
2326         {
2327             src << "OpMemberDecorate %SSB0_IN " << i
2328                 << " RelaxedPrecision\n"
2329                    "OpDecorate %in"
2330                 << i
2331                 << " RelaxedPrecision\n"
2332                    "OpDecorate %src_val_0_"
2333                 << i
2334                 << " RelaxedPrecision\n"
2335                    "OpDecorate %in"
2336                 << i << "_val RelaxedPrecision\n";
2337         }
2338 
2339         if (anyFloatResult)
2340         {
2341             switch (spec.spirvCase)
2342             {
2343             case SPIRV_CASETYPE_FREM:
2344                 src << "OpDecorate %frem_result RelaxedPrecision\n";
2345                 break;
2346             case SPIRV_CASETYPE_MODFSTRUCT:
2347                 src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2348                 break;
2349             case SPIRV_CASETYPE_FREXPSTRUCT:
2350                 src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2351                 break;
2352             default:
2353                 DE_ASSERT(false);
2354                 break;
2355             }
2356 
2357             for (size_t i = 0; i < outputTypes.size(); ++i)
2358             {
2359                 src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2360                 src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2361                 src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2362             }
2363         }
2364     }
2365 
2366     // Output offsets and stride.
2367     {
2368         int offset  = 0;
2369         int ndx     = 0;
2370         int largest = 0;
2371         for (const auto &symbol : spec.outputs)
2372         {
2373             const int scalarSize = symbol.varType.getScalarSize();
2374             const int memberSize =
2375                 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2376                 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2377                      (int)sizeof(uint64_t) :
2378                      (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2379                                                                               (int)sizeof(uint32_t)));
2380             const int extraMemberBytes = (offset % memberSize);
2381 
2382             offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2383             src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2384             ++ndx;
2385 
2386             if (memberSize > largest)
2387                 largest = memberSize;
2388 
2389             offset += memberSize;
2390         }
2391         DE_ASSERT(largest > 0);
2392         const int extraBytes = (offset % largest);
2393         const int stride     = offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2394         src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2395     }
2396 
2397     src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2398            "OpDecorate %ssboOUT BufferBlock\n"
2399            "OpDecorate %ssbo_dst DescriptorSet 0\n"
2400            "OpDecorate %ssbo_dst Binding 1\n"
2401            "\n"
2402            "%void  = OpTypeVoid\n"
2403            "%bool  = OpTypeBool\n"
2404            "%v2bool = OpTypeVector %bool 2\n"
2405            "%v3bool = OpTypeVector %bool 3\n"
2406            "%v4bool = OpTypeVector %bool 4\n"
2407            "%u32   = OpTypeInt 32 0\n";
2408 
2409     if (useF32Types)
2410         src << "%f32   = OpTypeFloat 32\n"
2411                "%v2f32 = OpTypeVector %f32 2\n"
2412                "%v3f32 = OpTypeVector %f32 3\n"
2413                "%v4f32 = OpTypeVector %f32 4\n";
2414 
2415     if (useF64Types)
2416         src << "%f64   = OpTypeFloat 64\n"
2417                "%v2f64 = OpTypeVector %f64 2\n"
2418                "%v3f64 = OpTypeVector %f64 3\n"
2419                "%v4f64 = OpTypeVector %f64 4\n";
2420 
2421     if (useF16Types)
2422         src << "%f16   = OpTypeFloat 16\n"
2423                "%v2f16 = OpTypeVector %f16 2\n"
2424                "%v3f16 = OpTypeVector %f16 3\n"
2425                "%v4f16 = OpTypeVector %f16 4\n";
2426 
2427     src << "%i32   = OpTypeInt 32 1\n"
2428            "%v2i32 = OpTypeVector %i32 2\n"
2429            "%v3i32 = OpTypeVector %i32 3\n"
2430            "%v4i32 = OpTypeVector %i32 4\n"
2431            "%v2u32 = OpTypeVector %u32 2\n"
2432            "%v3u32 = OpTypeVector %u32 3\n"
2433            "%v4u32 = OpTypeVector %u32 4\n"
2434            "\n"
2435            "%ip_u32   = OpTypePointer Input %u32\n"
2436            "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2437            "%up_float = OpTypePointer Uniform "
2438         << inputTypes[0]
2439         << "\n"
2440            "\n"
2441            "%fp_operation = OpTypePointer Function %i32\n"
2442            "%voidf        = OpTypeFunction %void\n"
2443            "%fp_u32       = OpTypePointer Function %u32\n"
2444            "%fp_it1       = OpTypePointer Function "
2445         << inputTypes[0] << "\n";
2446 
2447     for (size_t i = 0; i < outputTypes.size(); ++i)
2448     {
2449         src << "%fp_out_" << i << "     = OpTypePointer Function " << outputTypes[i] << "\n"
2450             << "%up_out_" << i << "     = OpTypePointer Uniform " << outputTypes[i] << "\n";
2451     }
2452 
2453     if (spec.packFloat16Bit)
2454         src << "%fp_f16  = OpTypePointer Function " << packType << "\n";
2455 
2456     src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2457            "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2458            "\n"
2459            "%c_u32_0 = OpConstant %u32 0\n"
2460            "%c_u32_1 = OpConstant %u32 1\n"
2461            "%c_u32_2 = OpConstant %u32 2\n"
2462            "%c_i32_0 = OpConstant %i32 0\n"
2463            "%c_i32_1 = OpConstant %i32 1\n"
2464            "\n";
2465 
2466     if (useF32Types)
2467         src << "%c_f32_0 = OpConstant %f32 0\n"
2468                "%c_f32_1 = OpConstant %f32 1\n";
2469 
2470     if (useF16Types)
2471         src << "%c_f16_0 = OpConstant %f16 0\n"
2472                "%c_f16_1 = OpConstant %f16 1\n"
2473                "%c_f16_minus1 = OpConstant %f16 -0x1p+0";
2474 
2475     if (useF64Types)
2476         src << "%c_f64_0 = OpConstant %f64 0\n"
2477                "%c_f64_1 = OpConstant %f64 1\n";
2478 
2479     src << "\n"
2480            "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2481            "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2482            "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2483            "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2484            "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2485            "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2486            "\n";
2487 
2488     if (useF32Types)
2489         src << "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2490                "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2491                "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2492                "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2493                "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2494                "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n";
2495 
2496     if (useF16Types)
2497         src << "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2498                "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2499                "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2500                "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2501                "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2502                "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n";
2503 
2504     if (useF64Types)
2505         src << "%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2506                "%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2507                "%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2508                "%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2509                "%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2510                "%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2511                "\n";
2512 
2513     // Input struct.
2514     {
2515         src << "%SSB0_IN    = OpTypeStruct";
2516         for (const auto &t : inputTypes)
2517             src << " " << t;
2518         src << "\n";
2519     }
2520 
2521     src << "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2522            "%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
2523            "%up_ssboIN  = OpTypePointer Uniform %ssboIN\n"
2524            "%ssbo_src   = OpVariable %up_ssboIN Uniform\n"
2525            "\n";
2526 
2527     // Output struct.
2528     {
2529         src << "%SSB0_OUT    = OpTypeStruct";
2530         for (const auto &t : outputTypes)
2531             src << " " << t;
2532         src << "\n";
2533     }
2534 
2535     std::string modfStructMemberType;
2536     std::string frexpStructFirstMemberType;
2537     if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2538     {
2539         modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2540         src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2541     }
2542     else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2543     {
2544         frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2545         src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2546     }
2547 
2548     src << "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2549            "%ssboOUT     = OpTypeStruct %up_SSB0_OUT\n"
2550            "%up_ssboOUT  = OpTypePointer Uniform %ssboOUT\n"
2551            "%ssbo_dst    = OpVariable %up_ssboOUT Uniform\n"
2552            "\n"
2553            "%BP_main = OpFunction %void None %voidf\n"
2554            "%BP_label = OpLabel\n"
2555            "%invocationNdx = OpVariable %fp_u32 Function\n";
2556 
2557     // Note: here we are supposing all inputs have the same type.
2558     for (size_t i = 0; i < inputTypes.size(); ++i)
2559         src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2560 
2561     for (size_t i = 0; i < outputTypes.size(); ++i)
2562         src << "%out" << i << " = OpVariable "
2563             << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2564 
2565     src << "%operation = OpVariable %fp_operation Function\n"
2566            "%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2567            "%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2568            "%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2569            "%BP_num_0_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2570            "%BP_num_1_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2571            "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2572            "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2573            "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2574            "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2575            "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2576            "\n"
2577            "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2578            "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2579            "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2580            "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2581            "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2582            "OpStore %invocationNdx %add_2\n"
2583            "%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2584 
2585     // Load input values.
2586     for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2587     {
2588         src << "\n"
2589             << "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_"
2590             << inputNdx << "\n"
2591             << "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2592 
2593         if (spec.packFloat16Bit)
2594         {
2595             if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2596             {
2597                 // Extract the val<inputNdx> u32 input channels into individual f16 values.
2598                 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2599                 {
2600                     src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx
2601                         << " " << i
2602                         << "\n"
2603                            "%val_v2f16_0_"
2604                         << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i
2605                         << "\n"
2606                            "%val_f16_0_"
2607                         << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i
2608                         << " 0\n";
2609                 }
2610 
2611                 // Construct the input vector.
2612                 src << "%val_f16_0_" << inputNdx << "   = OpCompositeConstruct " << packType;
2613                 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2614                 {
2615                     src << " %val_f16_0_" << inputNdx << "_" << i;
2616                 }
2617 
2618                 src << "\n";
2619                 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2620             }
2621             else
2622             {
2623                 src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx
2624                     << "\n"
2625                        "%val_f16_0_"
2626                     << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2627 
2628                 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2629             }
2630         }
2631         else
2632             src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2633 
2634         src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx])
2635             << " %in" << inputNdx << "\n";
2636     }
2637 
2638     src << "\n"
2639            "OpStore %operation %c_i32_1\n";
2640 
2641     // Fill output values with dummy data.
2642     for (size_t i = 0; i < outputTypes.size(); ++i)
2643         src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2644 
2645     src << "\n";
2646 
2647     // Run operation.
2648     switch (spec.spirvCase)
2649     {
2650     case SPIRV_CASETYPE_COMPARE:
2651         for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2652         {
2653             src << scalarComparison(COMPARE_OPERATIONS[operationNdx], operationNdx,
2654                                     spec.inputs[0].varType.getBasicType(), outputTypes[0],
2655                                     spec.outputs[0].varType.getScalarSize());
2656             src << moveBitOperation("%operation", moveBitNdx);
2657             ++moveBitNdx;
2658         }
2659         break;
2660     case SPIRV_CASETYPE_FREM:
2661         src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2662             << "OpStore %out0 %frem_result\n";
2663         break;
2664     case SPIRV_CASETYPE_MODFSTRUCT:
2665         src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2666             << "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2667             << "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2668             << "OpStore %out0 %modfstruct_result_0\n"
2669             << "OpStore %out1 %modfstruct_result_1\n";
2670         break;
2671     case SPIRV_CASETYPE_FREXPSTRUCT:
2672         src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2673             << "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2674             << "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2675             << "OpStore %out0 %frexpstruct_result_0\n"
2676             << "OpStore %out1 %frexpstruct_result_1\n";
2677         break;
2678     default:
2679         DE_ASSERT(false);
2680         break;
2681     }
2682 
2683     for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2684     {
2685         src << "\n"
2686                "%out_val_final_"
2687             << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out"
2688             << outputNdx
2689             << "\n"
2690                "%ssbo_dst_ptr_"
2691             << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_"
2692             << outputNdx << "\n";
2693 
2694         if (packFloatRes[outputNdx])
2695         {
2696             if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2697             {
2698                 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2699                 {
2700                     src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_"
2701                         << outputNdx << " " << i << "\n";
2702                     src << "%out_composite_" << outputNdx << "_" << i
2703                         << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i
2704                         << " %c_f16_minus1\n";
2705                     src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx
2706                         << "_" << i << "\n";
2707                 }
2708 
2709                 src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2710                 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2711                     src << " %u32_val_" << outputNdx << "_" << i;
2712                 src << "\n";
2713                 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2714             }
2715             else
2716             {
2717                 src << "%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx
2718                     << " %c_f16_minus1\n"
2719                        "%out_result_"
2720                     << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx
2721                     << "\n"
2722                        "OpStore %ssbo_dst_ptr_"
2723                     << outputNdx << " %out_result_" << outputNdx << "\n";
2724             }
2725         }
2726         else
2727         {
2728             src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2729         }
2730     }
2731 
2732     src << "\n"
2733            "OpReturn\n"
2734            "OpFunctionEnd\n";
2735 
2736     return src.str();
2737 }
2738 
generateComputeShader(const ShaderSpec & spec)2739 std::string ComputeShaderExecutor::generateComputeShader(const ShaderSpec &spec)
2740 {
2741     if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2742     {
2743         bool are16Bit  = false;
2744         bool are64Bit  = false;
2745         bool isMediump = false;
2746         for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2747         {
2748             if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2749                 are16Bit = true;
2750 
2751             if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2752                 are64Bit = true;
2753 
2754             if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2755                 isMediump = true;
2756 
2757             if (isMediump && are16Bit)
2758                 break;
2759         }
2760 
2761         return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2762     }
2763     else
2764     {
2765         std::ostringstream src;
2766         src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2767 
2768         if (!spec.globalDeclarations.empty())
2769             src << spec.globalDeclarations << "\n";
2770 
2771         src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2772             << "\n";
2773 
2774         declareBufferBlocks(src, spec);
2775 
2776         src << "void main (void)\n"
2777             << "{\n"
2778             << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2779             << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2780 
2781         generateExecBufferIo(src, spec, "invocationNdx");
2782 
2783         src << "}\n";
2784 
2785         return src.str();
2786     }
2787 }
2788 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2789 void ComputeShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
2790 {
2791     if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2792         programCollection.spirvAsmSources.add("compute")
2793             << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3)
2794             << generateComputeShader(shaderSpec);
2795     else
2796         programCollection.glslSources.add("compute")
2797             << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2798 }
2799 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2800 void ComputeShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
2801                                     VkDescriptorSet extraResources)
2802 {
2803     const VkDevice vkDevice         = m_context.getDevice();
2804     const DeviceInterface &vk       = m_context.getDeviceInterface();
2805     const VkQueue queue             = m_context.getUniversalQueue();
2806     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2807 
2808     DescriptorPoolBuilder descriptorPoolBuilder;
2809     DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2810 
2811     Move<VkShaderModule> computeShaderModule;
2812     Move<VkPipeline> computePipeline;
2813     Move<VkPipelineLayout> pipelineLayout;
2814     Move<VkCommandPool> cmdPool;
2815     Move<VkDescriptorPool> descriptorPool;
2816     Move<VkDescriptorSetLayout> descriptorSetLayout;
2817     Move<VkDescriptorSet> descriptorSet;
2818     const uint32_t numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2819 
2820     DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2821 
2822     initBuffers(numValues);
2823 
2824     // Setup input buffer & copy data
2825     // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2826     // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2827     // the shader.
2828     uploadInputBuffer(inputs, numValues,
2829                       m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2830 
2831     // Create command pool
2832     cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2833 
2834     // Create command buffer
2835 
2836     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2837     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2838     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2839     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2840 
2841     descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2842     descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2843 
2844     const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL,
2845                                                    *descriptorPool, 1u, &*descriptorSetLayout};
2846 
2847     descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2848 
2849     // Create pipeline layout
2850     {
2851         const VkDescriptorSetLayout descriptorSetLayouts[]    = {*descriptorSetLayout, m_extraResourcesLayout};
2852         const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
2853             VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2854             DE_NULL,                                       // const void* pNext;
2855             (VkPipelineLayoutCreateFlags)0,                // VkPipelineLayoutCreateFlags flags;
2856             numDescriptorSets,                             // uint32_t CdescriptorSetCount;
2857             descriptorSetLayouts,                          // const VkDescriptorSetLayout* pSetLayouts;
2858             0u,                                            // uint32_t pushConstantRangeCount;
2859             DE_NULL                                        // const VkPushConstantRange* pPushConstantRanges;
2860         };
2861 
2862         pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2863     }
2864 
2865     // Create shaders
2866     {
2867         computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2868     }
2869 
2870     // create pipeline
2871     {
2872         const VkPipelineShaderStageCreateInfo shaderStageParams[1] = {{
2873             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2874             DE_NULL,                                             // const void* pNext;
2875             (VkPipelineShaderStageCreateFlags)0u,                // VkPipelineShaderStageCreateFlags flags;
2876             VK_SHADER_STAGE_COMPUTE_BIT,                         // VkShaderStageFlagsBit stage;
2877             *computeShaderModule,                                // VkShaderModule shader;
2878             "main",                                              // const char* pName;
2879             DE_NULL                                              // const VkSpecializationInfo* pSpecializationInfo;
2880         }};
2881 
2882         const VkComputePipelineCreateInfo computePipelineParams = {
2883             VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2884             DE_NULL,                                        // const void* pNext;
2885             (VkPipelineCreateFlags)0,                       // VkPipelineCreateFlags flags;
2886             *shaderStageParams,                             // VkPipelineShaderStageCreateInfo cs;
2887             *pipelineLayout,                                // VkPipelineLayout layout;
2888             0u,                                             // VkPipeline basePipelineHandle;
2889             0u,                                             // int32_t basePipelineIndex;
2890         };
2891 
2892         computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2893     }
2894 
2895     const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2896     int curOffset                    = 0;
2897     const uint32_t inputStride       = getInputStride();
2898     const uint32_t outputStride      = getOutputStride();
2899 
2900     while (curOffset < numValues)
2901     {
2902         Move<VkCommandBuffer> cmdBuffer;
2903         const int numToExec = de::min(maxValuesPerInvocation, numValues - curOffset);
2904 
2905         // Update descriptors
2906         {
2907             DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2908 
2909             const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
2910                 *m_outputBuffer,          // VkBuffer buffer;
2911                 curOffset * outputStride, // VkDeviceSize offset;
2912                 numToExec * outputStride  // VkDeviceSize range;
2913             };
2914 
2915             descriptorSetUpdateBuilder.writeSingle(
2916                 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
2917                 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2918 
2919             if (inputStride)
2920             {
2921                 const VkDescriptorBufferInfo inputDescriptorBufferInfo = {
2922                     *m_inputBuffer,          // VkBuffer buffer;
2923                     curOffset * inputStride, // VkDeviceSize offset;
2924                     numToExec * inputStride  // VkDeviceSize range;
2925                 };
2926 
2927                 descriptorSetUpdateBuilder.writeSingle(
2928                     *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
2929                     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2930             }
2931 
2932             descriptorSetUpdateBuilder.update(vk, vkDevice);
2933         }
2934 
2935         cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2936         beginCommandBuffer(vk, *cmdBuffer);
2937         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2938 
2939         {
2940             const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
2941             vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets,
2942                                      descriptorSets, 0u, DE_NULL);
2943         }
2944 
2945         vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2946 
2947         // Insert a barrier so data written by the shader is available to the host
2948         {
2949             const VkBufferMemoryBarrier bufferBarrier = {
2950                 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType    sType;
2951                 DE_NULL,                                 // const void*        pNext;
2952                 VK_ACCESS_SHADER_WRITE_BIT,              // VkAccessFlags      srcAccessMask;
2953                 VK_ACCESS_HOST_READ_BIT,                 // VkAccessFlags      dstAccessMask;
2954                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           srcQueueFamilyIndex;
2955                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           dstQueueFamilyIndex;
2956                 *m_outputBuffer,                         // VkBuffer           buffer;
2957                 0,                                       // VkDeviceSize       offset;
2958                 VK_WHOLE_SIZE,                           // VkDeviceSize       size;
2959             };
2960 
2961             vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
2962                                   (VkDependencyFlags)0, 0, (const VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
2963                                   (const VkImageMemoryBarrier *)DE_NULL);
2964         }
2965 
2966         endCommandBuffer(vk, *cmdBuffer);
2967 
2968         curOffset += numToExec;
2969 
2970         // Execute
2971         submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2972     }
2973 
2974     // Read back data
2975     readOutputBuffer(outputs, numValues);
2976 }
2977 
2978 #ifndef CTS_USES_VULKANSC
2979 // MeshTaskShaderExecutor
2980 
2981 class MeshTaskShaderExecutor : public BufferIoExecutor
2982 {
2983 public:
2984     MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2985     virtual ~MeshTaskShaderExecutor(void);
2986 
2987     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection, bool useTask);
2988 
2989     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2990                          VkDescriptorSet extraResources);
2991 
2992 protected:
2993     static std::string generateMeshShader(const ShaderSpec &spec, bool useTask);
2994     static std::string generateTaskShader(const ShaderSpec &spec);
2995 
2996 private:
2997     const VkDescriptorSetLayout m_extraResourcesLayout;
2998 };
2999 
MeshTaskShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3000 MeshTaskShaderExecutor::MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
3001                                                VkDescriptorSetLayout extraResourcesLayout)
3002     : BufferIoExecutor(context, shaderSpec)
3003     , m_extraResourcesLayout(extraResourcesLayout)
3004 {
3005 }
3006 
~MeshTaskShaderExecutor(void)3007 MeshTaskShaderExecutor::~MeshTaskShaderExecutor(void)
3008 {
3009 }
3010 
generateMeshShader(const ShaderSpec & spec,bool useTask)3011 std::string MeshTaskShaderExecutor::generateMeshShader(const ShaderSpec &spec, bool useTask)
3012 {
3013     DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_NONE);
3014 
3015     std::ostringstream src;
3016 
3017     if (useTask)
3018     {
3019         src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3020             << "#extension GL_EXT_mesh_shader : enable\n"
3021             << "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
3022             << "layout(points) out;\n"
3023             << "layout(max_vertices=1, max_primitives=1) out;\n"
3024             << "\n"
3025             << "void main (void)\n"
3026             << "{\n"
3027             << "    SetMeshOutputsEXT(0u, 0u);\n"
3028             << "}\n";
3029     }
3030     else
3031     {
3032         src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3033             << "#extension GL_EXT_mesh_shader : enable\n";
3034 
3035         if (!spec.globalDeclarations.empty())
3036             src << spec.globalDeclarations << "\n";
3037 
3038         src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3039             << "layout(points) out;\n"
3040             << "layout(max_vertices=1, max_primitives=1) out;\n"
3041             << "\n";
3042 
3043         declareBufferBlocks(src, spec);
3044 
3045         src << "void main (void)\n"
3046             << "{\n"
3047             << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3048             << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3049 
3050         generateExecBufferIo(src, spec, "invocationNdx");
3051 
3052         src << "    SetMeshOutputsEXT(0u, 0u);\n"
3053             << "}\n";
3054     }
3055 
3056     return src.str();
3057 }
3058 
generateTaskShader(const ShaderSpec & spec)3059 std::string MeshTaskShaderExecutor::generateTaskShader(const ShaderSpec &spec)
3060 {
3061     std::ostringstream src;
3062 
3063     src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3064         << "#extension GL_EXT_mesh_shader : enable\n";
3065 
3066     if (!spec.globalDeclarations.empty())
3067         src << spec.globalDeclarations << "\n";
3068 
3069     src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3070         << "\n";
3071 
3072     declareBufferBlocks(src, spec);
3073 
3074     src << "void main (void)\n"
3075         << "{\n"
3076         << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3077         << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3078 
3079     generateExecBufferIo(src, spec, "invocationNdx");
3080 
3081     src << "    EmitMeshTasksEXT(0u, 0u, 0u);\n"
3082         << "}\n";
3083 
3084     return src.str();
3085 }
3086 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection,bool useTask)3087 void MeshTaskShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection,
3088                                              bool useTask)
3089 {
3090     DE_ASSERT(shaderSpec.spirvCase == SPIRV_CASETYPE_NONE);
3091     programCollection.glslSources.add("mesh")
3092         << glu::MeshSource(generateMeshShader(shaderSpec, useTask)) << shaderSpec.buildOptions;
3093     if (useTask)
3094         programCollection.glslSources.add("task")
3095             << glu::TaskSource(generateTaskShader(shaderSpec)) << shaderSpec.buildOptions;
3096 }
3097 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3098 void MeshTaskShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3099                                      VkDescriptorSet extraResources)
3100 {
3101     const auto vkDevice         = m_context.getDevice();
3102     const auto &vk              = m_context.getDeviceInterface();
3103     const auto queue            = m_context.getUniversalQueue();
3104     const auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3105     const auto bindPoint        = VK_PIPELINE_BIND_POINT_GRAPHICS;
3106     const auto &binaries        = m_context.getBinaryCollection();
3107     const bool useTask          = binaries.contains("task");
3108     const auto shaderStage      = (useTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
3109     const auto pipelineStage =
3110         (useTask ? VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT : VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT);
3111 
3112     DE_ASSERT((m_extraResourcesLayout != DE_NULL) == (extraResources != DE_NULL));
3113 
3114     // Create input and output buffers.
3115     initBuffers(numValues);
3116 
3117     // Setup input buffer & copy data
3118     // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
3119     // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
3120     // the shader.
3121     uploadInputBuffer(inputs, numValues,
3122                       m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
3123 
3124     // Create command pool
3125     const auto cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3126 
3127     // Descriptor pool, set layout and set.
3128     DescriptorPoolBuilder descriptorPoolBuilder;
3129     DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3130 
3131     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3132     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3133     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3134     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3135 
3136     const auto descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3137     const auto descriptorPool =
3138         descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3139     const auto descriptorSet = makeDescriptorSet(vk, vkDevice, descriptorPool.get(), descriptorSetLayout.get());
3140 
3141     // Create pipeline layout
3142     std::vector<VkDescriptorSetLayout> setLayouts;
3143     setLayouts.push_back(descriptorSetLayout.get());
3144     if (m_extraResourcesLayout != DE_NULL)
3145         setLayouts.push_back(m_extraResourcesLayout);
3146 
3147     const auto pipelineLayout =
3148         makePipelineLayout(vk, vkDevice, static_cast<uint32_t>(setLayouts.size()), de::dataOrNull(setLayouts));
3149 
3150     // Create shaders
3151     const auto meshShaderModule = createShaderModule(vk, vkDevice, binaries.get("mesh"));
3152     const auto taskShaderModule =
3153         (useTask ? createShaderModule(vk, vkDevice, binaries.get("task")) : Move<VkShaderModule>());
3154 
3155     // Render pass and framebuffer.
3156     const auto fbExtent   = makeExtent2D(1u, 1u);
3157     const auto renderPass = makeRenderPass(vk, vkDevice);
3158     const auto framebuffer =
3159         makeFramebuffer(vk, vkDevice, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
3160 
3161     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
3162     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
3163 
3164     // Create pipeline.
3165     const auto meshPipeline =
3166         makeGraphicsPipeline(vk, vkDevice, pipelineLayout.get(), taskShaderModule.get(), meshShaderModule.get(),
3167                              DE_NULL, renderPass.get(), viewports, scissors);
3168 
3169     const int maxValuesPerInvocation = m_context.getMeshShaderPropertiesEXT().maxMeshWorkGroupSize[0];
3170     const uint32_t inputStride       = getInputStride();
3171     const uint32_t outputStride      = getOutputStride();
3172     const auto outputBufferBinding =
3173         DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(OUTPUT_BUFFER_BINDING));
3174     const auto inputBufferBinding =
3175         DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(INPUT_BUFFER_BINDING));
3176     int curOffset = 0;
3177 
3178     while (curOffset < numValues)
3179     {
3180         const auto remaining = numValues - curOffset;
3181         const auto numToExec = de::min(maxValuesPerInvocation, remaining);
3182 
3183         // Update descriptors
3184         {
3185             DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3186 
3187             const auto outputDescriptorBufferInfo =
3188                 makeDescriptorBufferInfo(m_outputBuffer.get(), curOffset * outputStride, numToExec * outputStride);
3189             descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), outputBufferBinding,
3190                                                    VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3191 
3192             if (inputStride)
3193             {
3194                 const auto inputDescriptorBufferInfo =
3195                     makeDescriptorBufferInfo(m_inputBuffer.get(), curOffset * inputStride, numToExec * inputStride);
3196                 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), inputBufferBinding,
3197                                                        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3198             }
3199 
3200             descriptorSetUpdateBuilder.update(vk, vkDevice);
3201         }
3202 
3203         std::vector<VkDescriptorSet> descriptorSets;
3204         descriptorSets.push_back(descriptorSet.get());
3205         if (extraResources != DE_NULL)
3206             descriptorSets.push_back(extraResources);
3207 
3208         const auto bufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
3209                                                            m_outputBuffer.get(), 0ull, VK_WHOLE_SIZE);
3210         const auto cmdBufferPtr  = allocateCommandBuffer(vk, vkDevice, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3211         const auto cmdBuffer     = cmdBufferPtr.get();
3212 
3213         // Record command buffer, including pipeline barrier from output buffer to the host.
3214         beginCommandBuffer(vk, cmdBuffer);
3215         beginRenderPass(vk, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
3216         vk.cmdBindPipeline(cmdBuffer, bindPoint, meshPipeline.get());
3217         vk.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u,
3218                                  static_cast<uint32_t>(descriptorSets.size()), de::dataOrNull(descriptorSets), 0u,
3219                                  DE_NULL);
3220         vk.cmdDrawMeshTasksEXT(cmdBuffer, numToExec, 1u, 1u);
3221         endRenderPass(vk, cmdBuffer);
3222         cmdPipelineBufferMemoryBarrier(vk, cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, &bufferBarrier);
3223         endCommandBuffer(vk, cmdBuffer);
3224 
3225         // Execute
3226         submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer);
3227 
3228         curOffset += numToExec;
3229     }
3230 
3231     // Read back data
3232     readOutputBuffer(outputs, numValues);
3233 }
3234 #endif // CTS_USES_VULKANSC
3235 
3236 // Tessellation utils
3237 
generateVertexShaderForTess(void)3238 static std::string generateVertexShaderForTess(void)
3239 {
3240     std::ostringstream src;
3241     src << "#version 450\n"
3242         << "void main (void)\n{\n"
3243         << "    gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
3244         << "}\n";
3245 
3246     return src.str();
3247 }
3248 
3249 class TessellationExecutor : public BufferIoExecutor
3250 {
3251 public:
3252     TessellationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3253     virtual ~TessellationExecutor(void);
3254 
3255     void renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3256                     VkDescriptorSet extraResources);
3257 
3258 private:
3259     const VkDescriptorSetLayout m_extraResourcesLayout;
3260 };
3261 
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3262 TessellationExecutor::TessellationExecutor(Context &context, const ShaderSpec &shaderSpec,
3263                                            VkDescriptorSetLayout extraResourcesLayout)
3264     : BufferIoExecutor(context, shaderSpec)
3265     , m_extraResourcesLayout(extraResourcesLayout)
3266 {
3267     const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
3268 
3269     if (!features.tessellationShader)
3270         TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
3271 }
3272 
~TessellationExecutor(void)3273 TessellationExecutor::~TessellationExecutor(void)
3274 {
3275 }
3276 
renderTess(uint32_t numValues,uint32_t vertexCount,uint32_t patchControlPoints,VkDescriptorSet extraResources)3277 void TessellationExecutor::renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3278                                       VkDescriptorSet extraResources)
3279 {
3280     const size_t inputBufferSize    = numValues * getInputStride();
3281     const VkDevice vkDevice         = m_context.getDevice();
3282     const DeviceInterface &vk       = m_context.getDeviceInterface();
3283     const VkQueue queue             = m_context.getUniversalQueue();
3284     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3285     Allocator &memAlloc             = m_context.getDefaultAllocator();
3286 
3287     const tcu::UVec2 renderSize(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
3288 
3289     Move<VkImage> colorImage;
3290     de::MovePtr<Allocation> colorImageAlloc;
3291     VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
3292     Move<VkImageView> colorImageView;
3293 
3294     Move<VkRenderPass> renderPass;
3295     Move<VkFramebuffer> framebuffer;
3296     Move<VkPipelineLayout> pipelineLayout;
3297     Move<VkPipeline> graphicsPipeline;
3298 
3299     Move<VkShaderModule> vertexShaderModule;
3300     Move<VkShaderModule> tessControlShaderModule;
3301     Move<VkShaderModule> tessEvalShaderModule;
3302     Move<VkShaderModule> fragmentShaderModule;
3303 
3304     Move<VkCommandPool> cmdPool;
3305     Move<VkCommandBuffer> cmdBuffer;
3306 
3307     Move<VkDescriptorPool> descriptorPool;
3308     Move<VkDescriptorSetLayout> descriptorSetLayout;
3309     Move<VkDescriptorSet> descriptorSet;
3310     const uint32_t numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
3311 
3312     DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
3313 
3314     // Create color image
3315     {
3316         const VkImageCreateInfo colorImageParams = {
3317             VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                   // VkStructureType sType;
3318             DE_NULL,                                                               // const void* pNext;
3319             0u,                                                                    // VkImageCreateFlags flags;
3320             VK_IMAGE_TYPE_2D,                                                      // VkImageType imageType;
3321             colorFormat,                                                           // VkFormat format;
3322             {renderSize.x(), renderSize.y(), 1u},                                  // VkExtent3D extent;
3323             1u,                                                                    // uint32_t mipLevels;
3324             1u,                                                                    // uint32_t arraySize;
3325             VK_SAMPLE_COUNT_1_BIT,                                                 // VkSampleCountFlagBits samples;
3326             VK_IMAGE_TILING_OPTIMAL,                                               // VkImageTiling tiling;
3327             VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
3328             VK_SHARING_MODE_EXCLUSIVE,                                             // VkSharingMode sharingMode;
3329             1u,                                                                    // uint32_t queueFamilyCount;
3330             &queueFamilyIndex,        // const uint32_t* pQueueFamilyIndices;
3331             VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
3332         };
3333 
3334         colorImage = createImage(vk, vkDevice, &colorImageParams);
3335 
3336         // Allocate and bind color image memory
3337         colorImageAlloc =
3338             memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
3339         VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
3340     }
3341 
3342     // Create color attachment view
3343     {
3344         const VkImageViewCreateInfo colorImageViewParams = {
3345             VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
3346             DE_NULL,                                  // const void* pNext;
3347             0u,                                       // VkImageViewCreateFlags flags;
3348             *colorImage,                              // VkImage image;
3349             VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
3350             colorFormat,                              // VkFormat format;
3351             {
3352                 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
3353                 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
3354                 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
3355                 VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle a;
3356             },                          // VkComponentsMapping components;
3357             {
3358                 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
3359                 0u,                        // uint32_t baseMipLevel;
3360                 1u,                        // uint32_t mipLevels;
3361                 0u,                        // uint32_t baseArraylayer;
3362                 1u                         // uint32_t layerCount;
3363             }                              // VkImageSubresourceRange subresourceRange;
3364         };
3365 
3366         colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
3367     }
3368 
3369     // Create render pass
3370     {
3371         const VkAttachmentDescription colorAttachmentDescription = {
3372             0u,                                      // VkAttachmentDescriptorFlags flags;
3373             colorFormat,                             // VkFormat format;
3374             VK_SAMPLE_COUNT_1_BIT,                   // VkSampleCountFlagBits samples;
3375             VK_ATTACHMENT_LOAD_OP_CLEAR,             // VkAttachmentLoadOp loadOp;
3376             VK_ATTACHMENT_STORE_OP_STORE,            // VkAttachmentStoreOp storeOp;
3377             VK_ATTACHMENT_LOAD_OP_DONT_CARE,         // VkAttachmentLoadOp stencilLoadOp;
3378             VK_ATTACHMENT_STORE_OP_DONT_CARE,        // VkAttachmentStoreOp stencilStoreOp;
3379             VK_IMAGE_LAYOUT_UNDEFINED,               // VkImageLayout initialLayout;
3380             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout                finalLayout
3381         };
3382 
3383         const VkAttachmentDescription attachments[1] = {colorAttachmentDescription};
3384 
3385         const VkAttachmentReference colorAttachmentReference = {
3386             0u,                                      // uint32_t attachment;
3387             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
3388         };
3389 
3390         const VkSubpassDescription subpassDescription = {
3391             0u,                              // VkSubpassDescriptionFlags flags;
3392             VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
3393             0u,                              // uint32_t inputCount;
3394             DE_NULL,                         // const VkAttachmentReference* pInputAttachments;
3395             1u,                              // uint32_t colorCount;
3396             &colorAttachmentReference,       // const VkAttachmentReference* pColorAttachments;
3397             DE_NULL,                         // const VkAttachmentReference* pResolveAttachments;
3398             DE_NULL,                         // VkAttachmentReference depthStencilAttachment;
3399             0u,                              // uint32_t preserveCount;
3400             DE_NULL                          // const VkAttachmentReference* pPreserveAttachments;
3401         };
3402 
3403         const VkRenderPassCreateInfo renderPassParams = {
3404             VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
3405             DE_NULL,                                   // const void* pNext;
3406             0u,                                        // VkRenderPassCreateFlags flags;
3407             1u,                                        // uint32_t attachmentCount;
3408             attachments,                               // const VkAttachmentDescription* pAttachments;
3409             1u,                                        // uint32_t subpassCount;
3410             &subpassDescription,                       // const VkSubpassDescription* pSubpasses;
3411             0u,                                        // uint32_t dependencyCount;
3412             DE_NULL                                    // const VkSubpassDependency* pDependencies;
3413         };
3414 
3415         renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3416     }
3417 
3418     // Create framebuffer
3419     {
3420         const VkFramebufferCreateInfo framebufferParams = {
3421             VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
3422             DE_NULL,                                   // const void* pNext;
3423             0u,                                        // VkFramebufferCreateFlags flags;
3424             *renderPass,                               // VkRenderPass renderPass;
3425             1u,                                        // uint32_t attachmentCount;
3426             &*colorImageView,                          // const VkAttachmentBindInfo* pAttachments;
3427             (uint32_t)renderSize.x(),                  // uint32_t width;
3428             (uint32_t)renderSize.y(),                  // uint32_t height;
3429             1u                                         // uint32_t layers;
3430         };
3431 
3432         framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3433     }
3434 
3435     // Create descriptors
3436     {
3437         DescriptorPoolBuilder descriptorPoolBuilder;
3438         DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3439 
3440         descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3441         descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3442         descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3443         descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3444 
3445         descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3446         descriptorPool =
3447             descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3448 
3449         const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, DE_NULL,
3450                                                        *descriptorPool, 1u, &*descriptorSetLayout};
3451 
3452         descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3453         // Update descriptors
3454         {
3455             DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3456             const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
3457                 *m_outputBuffer, // VkBuffer buffer;
3458                 0u,              // VkDeviceSize offset;
3459                 VK_WHOLE_SIZE    // VkDeviceSize range;
3460             };
3461 
3462             descriptorSetUpdateBuilder.writeSingle(
3463                 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
3464                 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3465 
3466             VkDescriptorBufferInfo inputDescriptorBufferInfo = {
3467                 0,            // VkBuffer buffer;
3468                 0u,           // VkDeviceSize offset;
3469                 VK_WHOLE_SIZE // VkDeviceSize range;
3470             };
3471 
3472             if (inputBufferSize > 0)
3473             {
3474                 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3475 
3476                 descriptorSetUpdateBuilder.writeSingle(
3477                     *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
3478                     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3479             }
3480 
3481             descriptorSetUpdateBuilder.update(vk, vkDevice);
3482         }
3483     }
3484 
3485     // Create pipeline layout
3486     {
3487         const VkDescriptorSetLayout descriptorSetLayouts[]    = {*descriptorSetLayout, m_extraResourcesLayout};
3488         const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
3489             VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
3490             DE_NULL,                                       // const void* pNext;
3491             (VkPipelineLayoutCreateFlags)0,                // VkPipelineLayoutCreateFlags flags;
3492             numDescriptorSets,                             // uint32_t descriptorSetCount;
3493             descriptorSetLayouts,                          // const VkDescriptorSetLayout* pSetLayouts;
3494             0u,                                            // uint32_t pushConstantRangeCount;
3495             DE_NULL                                        // const VkPushConstantRange* pPushConstantRanges;
3496         };
3497 
3498         pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3499     }
3500 
3501     // Create shader modules
3502     {
3503         vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3504         tessControlShaderModule =
3505             createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3506         tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3507         fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3508     }
3509 
3510     // Create pipeline
3511     {
3512         const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
3513             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
3514             DE_NULL,                                                   // const void* pNext;
3515             (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
3516             0u,                                                        // uint32_t bindingCount;
3517             DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
3518             0u,      // uint32_t attributeCount;
3519             DE_NULL, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
3520         };
3521 
3522         const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
3523         const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
3524 
3525         graphicsPipeline = makeGraphicsPipeline(
3526             vk,                       // const DeviceInterface&                        vk
3527             vkDevice,                 // const VkDevice                                device
3528             *pipelineLayout,          // const VkPipelineLayout                        pipelineLayout
3529             *vertexShaderModule,      // const VkShaderModule                          vertexShaderModule
3530             *tessControlShaderModule, // const VkShaderModule                          tessellationControlShaderModule
3531             *tessEvalShaderModule,    // const VkShaderModule                          tessellationEvalShaderModule
3532             DE_NULL,                  // const VkShaderModule                          geometryShaderModule
3533             *fragmentShaderModule,    // const VkShaderModule                          fragmentShaderModule
3534             *renderPass,              // const VkRenderPass                            renderPass
3535             viewports,                // const std::vector<VkViewport>&                viewports
3536             scissors,                 // const std::vector<VkRect2D>&                  scissors
3537             VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology                     topology
3538             0u,                               // const uint32_t                                subpass
3539             patchControlPoints,               // const uint32_t                                patchControlPoints
3540             &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
3541     }
3542 
3543     // Create command pool
3544     cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3545 
3546     // Create command buffer
3547     {
3548         const VkClearValue clearValue = getDefaultClearColor();
3549 
3550         cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3551 
3552         beginCommandBuffer(vk, *cmdBuffer);
3553 
3554         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
3555                         clearValue);
3556 
3557         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3558 
3559         {
3560             const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
3561             vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
3562                                      numDescriptorSets, descriptorSets, 0u, DE_NULL);
3563         }
3564 
3565         vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3566 
3567         endRenderPass(vk, *cmdBuffer);
3568 
3569         // Insert a barrier so data written by the shader is available to the host
3570         {
3571             const VkBufferMemoryBarrier bufferBarrier = {
3572                 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType    sType;
3573                 DE_NULL,                                 // const void*        pNext;
3574                 VK_ACCESS_SHADER_WRITE_BIT,              // VkAccessFlags      srcAccessMask;
3575                 VK_ACCESS_HOST_READ_BIT,                 // VkAccessFlags      dstAccessMask;
3576                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           srcQueueFamilyIndex;
3577                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           dstQueueFamilyIndex;
3578                 *m_outputBuffer,                         // VkBuffer           buffer;
3579                 0,                                       // VkDeviceSize       offset;
3580                 VK_WHOLE_SIZE,                           // VkDeviceSize       size;
3581             };
3582 
3583             vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT,
3584                                   vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0,
3585                                   (const VkMemoryBarrier *)DE_NULL, 1, &bufferBarrier, 0,
3586                                   (const VkImageMemoryBarrier *)DE_NULL);
3587         }
3588 
3589         endCommandBuffer(vk, *cmdBuffer);
3590     }
3591 
3592     // Execute Draw
3593     submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3594 }
3595 
3596 // TessControlExecutor
3597 
3598 class TessControlExecutor : public TessellationExecutor
3599 {
3600 public:
3601     TessControlExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3602     virtual ~TessControlExecutor(void);
3603 
3604     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3605 
3606     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3607                          VkDescriptorSet extraResources);
3608 
3609 protected:
3610     static std::string generateTessControlShader(const ShaderSpec &shaderSpec);
3611 };
3612 
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3613 TessControlExecutor::TessControlExecutor(Context &context, const ShaderSpec &shaderSpec,
3614                                          VkDescriptorSetLayout extraResourcesLayout)
3615     : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3616 {
3617 }
3618 
~TessControlExecutor(void)3619 TessControlExecutor::~TessControlExecutor(void)
3620 {
3621 }
3622 
generateTessControlShader(const ShaderSpec & shaderSpec)3623 std::string TessControlExecutor::generateTessControlShader(const ShaderSpec &shaderSpec)
3624 {
3625     std::ostringstream src;
3626     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3627 
3628     if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3629         src << "#extension GL_EXT_tessellation_shader : require\n\n";
3630 
3631     if (!shaderSpec.globalDeclarations.empty())
3632         src << shaderSpec.globalDeclarations << "\n";
3633 
3634     src << "\nlayout(vertices = 1) out;\n\n";
3635 
3636     declareBufferBlocks(src, shaderSpec);
3637 
3638     src << "void main (void)\n{\n";
3639 
3640     for (int ndx = 0; ndx < 2; ndx++)
3641         src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3642 
3643     for (int ndx = 0; ndx < 4; ndx++)
3644         src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3645 
3646     src << "\n"
3647         << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3648 
3649     generateExecBufferIo(src, shaderSpec, "invocationId");
3650 
3651     src << "}\n";
3652 
3653     return src.str();
3654 }
3655 
generateEmptyTessEvalShader()3656 static std::string generateEmptyTessEvalShader()
3657 {
3658     std::ostringstream src;
3659 
3660     src << "#version 450\n"
3661            "#extension GL_EXT_tessellation_shader : require\n\n";
3662 
3663     src << "layout(triangles, ccw) in;\n";
3664 
3665     src << "\nvoid main (void)\n{\n"
3666         << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3667         << "}\n";
3668 
3669     return src.str();
3670 }
3671 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3672 void TessControlExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3673 {
3674     programCollection.glslSources.add("vert")
3675         << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3676     programCollection.glslSources.add("tess_control")
3677         << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3678     programCollection.glslSources.add("tess_eval")
3679         << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3680     programCollection.glslSources.add("frag")
3681         << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3682 }
3683 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3684 void TessControlExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3685                                   VkDescriptorSet extraResources)
3686 {
3687     const uint32_t patchSize = 3;
3688 
3689     initBuffers(numValues);
3690 
3691     // Setup input buffer & copy data
3692     uploadInputBuffer(inputs, numValues, false);
3693 
3694     renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3695 
3696     // Read back data
3697     readOutputBuffer(outputs, numValues);
3698 }
3699 
3700 // TessEvaluationExecutor
3701 
3702 class TessEvaluationExecutor : public TessellationExecutor
3703 {
3704 public:
3705     TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3706     virtual ~TessEvaluationExecutor(void);
3707 
3708     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3709 
3710     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3711                          VkDescriptorSet extraResources);
3712 
3713 protected:
3714     static std::string generateTessEvalShader(const ShaderSpec &shaderSpec);
3715 };
3716 
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3717 TessEvaluationExecutor::TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec,
3718                                                VkDescriptorSetLayout extraResourcesLayout)
3719     : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3720 {
3721 }
3722 
~TessEvaluationExecutor(void)3723 TessEvaluationExecutor::~TessEvaluationExecutor(void)
3724 {
3725 }
3726 
generatePassthroughTessControlShader(void)3727 static std::string generatePassthroughTessControlShader(void)
3728 {
3729     std::ostringstream src;
3730 
3731     src << "#version 450\n"
3732            "#extension GL_EXT_tessellation_shader : require\n\n";
3733 
3734     src << "layout(vertices = 1) out;\n\n";
3735 
3736     src << "void main (void)\n{\n";
3737 
3738     for (int ndx = 0; ndx < 2; ndx++)
3739         src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3740 
3741     for (int ndx = 0; ndx < 4; ndx++)
3742         src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3743 
3744     src << "}\n";
3745 
3746     return src.str();
3747 }
3748 
generateTessEvalShader(const ShaderSpec & shaderSpec)3749 std::string TessEvaluationExecutor::generateTessEvalShader(const ShaderSpec &shaderSpec)
3750 {
3751     std::ostringstream src;
3752 
3753     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3754 
3755     if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3756         src << "#extension GL_EXT_tessellation_shader : require\n\n";
3757 
3758     if (!shaderSpec.globalDeclarations.empty())
3759         src << shaderSpec.globalDeclarations << "\n";
3760 
3761     src << "\n";
3762 
3763     src << "layout(isolines, equal_spacing) in;\n\n";
3764 
3765     declareBufferBlocks(src, shaderSpec);
3766 
3767     src << "void main (void)\n{\n"
3768         << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3769         << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3770 
3771     generateExecBufferIo(src, shaderSpec, "invocationId");
3772 
3773     src << "}\n";
3774 
3775     return src.str();
3776 }
3777 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3778 void TessEvaluationExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3779 {
3780     programCollection.glslSources.add("vert")
3781         << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3782     programCollection.glslSources.add("tess_control")
3783         << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3784     programCollection.glslSources.add("tess_eval")
3785         << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3786     programCollection.glslSources.add("frag")
3787         << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3788 }
3789 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3790 void TessEvaluationExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3791                                      VkDescriptorSet extraResources)
3792 {
3793     const int patchSize     = 2;
3794     const int alignedValues = deAlign32(numValues, patchSize);
3795 
3796     // Initialize buffers with aligned value count to make room for padding
3797     initBuffers(alignedValues);
3798 
3799     // Setup input buffer & copy data
3800     uploadInputBuffer(inputs, numValues, false);
3801 
3802     renderTess((uint32_t)alignedValues, (uint32_t)alignedValues, (uint32_t)patchSize, extraResources);
3803 
3804     // Read back data
3805     readOutputBuffer(outputs, numValues);
3806 }
3807 
3808 } // namespace
3809 
3810 // ShaderExecutor
3811 
~ShaderExecutor(void)3812 ShaderExecutor::~ShaderExecutor(void)
3813 {
3814 }
3815 
areInputs16Bit(void) const3816 bool ShaderExecutor::areInputs16Bit(void) const
3817 {
3818     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3819          ++symIter)
3820     {
3821         if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3822             return true;
3823     }
3824     return false;
3825 }
3826 
areOutputs16Bit(void) const3827 bool ShaderExecutor::areOutputs16Bit(void) const
3828 {
3829     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3830          ++symIter)
3831     {
3832         if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3833             return true;
3834     }
3835     return false;
3836 }
3837 
isOutput16Bit(const size_t ndx) const3838 bool ShaderExecutor::isOutput16Bit(const size_t ndx) const
3839 {
3840     if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3841         return true;
3842     return false;
3843 }
3844 
areInputs64Bit(void) const3845 bool ShaderExecutor::areInputs64Bit(void) const
3846 {
3847     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3848          ++symIter)
3849     {
3850         if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3851             return true;
3852     }
3853     return false;
3854 }
3855 
areOutputs64Bit(void) const3856 bool ShaderExecutor::areOutputs64Bit(void) const
3857 {
3858     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3859          ++symIter)
3860     {
3861         if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3862             return true;
3863     }
3864     return false;
3865 }
3866 
isOutput64Bit(const size_t ndx) const3867 bool ShaderExecutor::isOutput64Bit(const size_t ndx) const
3868 {
3869     if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3870         return true;
3871     return false;
3872 }
3873 
3874 // Utilities
3875 
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3876 void generateSources(glu::ShaderType shaderType, const ShaderSpec &shaderSpec, vk::SourceCollections &dst)
3877 {
3878     switch (shaderType)
3879     {
3880     case glu::SHADERTYPE_VERTEX:
3881         VertexShaderExecutor::generateSources(shaderSpec, dst);
3882         break;
3883     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3884         TessControlExecutor::generateSources(shaderSpec, dst);
3885         break;
3886     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3887         TessEvaluationExecutor::generateSources(shaderSpec, dst);
3888         break;
3889     case glu::SHADERTYPE_GEOMETRY:
3890         GeometryShaderExecutor::generateSources(shaderSpec, dst);
3891         break;
3892     case glu::SHADERTYPE_FRAGMENT:
3893         FragmentShaderExecutor::generateSources(shaderSpec, dst);
3894         break;
3895     case glu::SHADERTYPE_COMPUTE:
3896         ComputeShaderExecutor::generateSources(shaderSpec, dst);
3897         break;
3898 #ifndef CTS_USES_VULKANSC
3899     case glu::SHADERTYPE_MESH:
3900         MeshTaskShaderExecutor::generateSources(shaderSpec, dst, false /*useTask*/);
3901         break;
3902     case glu::SHADERTYPE_TASK:
3903         MeshTaskShaderExecutor::generateSources(shaderSpec, dst, true /*useTask*/);
3904         break;
3905 #endif // CTS_USES_VULKANSC
3906     default:
3907         TCU_THROW(InternalError, "Unsupported shader type");
3908     }
3909 }
3910 
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3911 ShaderExecutor *createExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
3912                                VkDescriptorSetLayout extraResourcesLayout)
3913 {
3914     switch (shaderType)
3915     {
3916     case glu::SHADERTYPE_VERTEX:
3917         return new VertexShaderExecutor(context, shaderSpec, extraResourcesLayout);
3918     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3919         return new TessControlExecutor(context, shaderSpec, extraResourcesLayout);
3920     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3921         return new TessEvaluationExecutor(context, shaderSpec, extraResourcesLayout);
3922     case glu::SHADERTYPE_GEOMETRY:
3923         return new GeometryShaderExecutor(context, shaderSpec, extraResourcesLayout);
3924     case glu::SHADERTYPE_FRAGMENT:
3925         return new FragmentShaderExecutor(context, shaderSpec, extraResourcesLayout);
3926     case glu::SHADERTYPE_COMPUTE:
3927         return new ComputeShaderExecutor(context, shaderSpec, extraResourcesLayout);
3928 #ifndef CTS_USES_VULKANSC
3929     case glu::SHADERTYPE_MESH:
3930         return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3931     case glu::SHADERTYPE_TASK:
3932         return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3933 #endif // CTS_USES_VULKANSC
3934     default:
3935         TCU_THROW(InternalError, "Unsupported shader type");
3936     }
3937 }
3938 
executorSupported(glu::ShaderType shaderType)3939 bool executorSupported(glu::ShaderType shaderType)
3940 {
3941     switch (shaderType)
3942     {
3943     case glu::SHADERTYPE_VERTEX:
3944     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3945     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3946     case glu::SHADERTYPE_GEOMETRY:
3947     case glu::SHADERTYPE_FRAGMENT:
3948     case glu::SHADERTYPE_COMPUTE:
3949     case glu::SHADERTYPE_MESH:
3950     case glu::SHADERTYPE_TASK:
3951         return true;
3952     default:
3953         return false;
3954     }
3955 }
3956 
checkSupportShader(Context & context,const glu::ShaderType shaderType)3957 void checkSupportShader(Context &context, const glu::ShaderType shaderType)
3958 {
3959     // Stage support.
3960     switch (shaderType)
3961     {
3962     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3963     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3964         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
3965         break;
3966 
3967     case glu::SHADERTYPE_GEOMETRY:
3968         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
3969         break;
3970 
3971     case glu::SHADERTYPE_TASK:
3972     case glu::SHADERTYPE_MESH:
3973     {
3974         context.requireDeviceFunctionality("VK_EXT_mesh_shader");
3975 
3976         if (shaderType == glu::SHADERTYPE_TASK)
3977         {
3978 #ifndef CTS_USES_VULKANSC
3979             const auto &features = context.getMeshShaderFeaturesEXT();
3980             if (!features.taskShader)
3981                 TCU_THROW(NotSupportedError, "taskShader not supported");
3982 #else  // CTS_USES_VULKANSC
3983             TCU_THROW(NotSupportedError, "taskShader not supported");
3984 #endif // CTS_USES_VULKANSC
3985         }
3986     }
3987     break;
3988 
3989     default:
3990         break;
3991     }
3992 
3993     // Stores and atomic operation support.
3994     switch (shaderType)
3995     {
3996     case glu::SHADERTYPE_VERTEX:
3997     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3998     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3999     case glu::SHADERTYPE_GEOMETRY:
4000     case glu::SHADERTYPE_TASK:
4001     case glu::SHADERTYPE_MESH:
4002         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
4003         break;
4004     case glu::SHADERTYPE_FRAGMENT:
4005         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
4006         break;
4007     case glu::SHADERTYPE_COMPUTE:
4008         break;
4009     default:
4010         DE_FATAL("Unsupported shader type");
4011         break;
4012     }
4013 
4014 #ifndef CTS_USES_VULKANSC
4015     if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
4016         context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4017         !context.getPortabilitySubsetFeatures().tessellationIsolines)
4018     {
4019         TCU_THROW(NotSupportedError,
4020                   "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4021     }
4022 #endif // CTS_USES_VULKANSC
4023 }
4024 
4025 } // namespace shaderexecutor
4026 } // namespace vkt
4027