1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief Mesh Shader Smoke Tests for VK_EXT_mesh_shader
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktMeshShaderSmokeTestsEXT.hpp"
28 #include "vktMeshShaderUtil.hpp"
29 #include "vktTestCase.hpp"
30 #include "vktTestCaseUtil.hpp"
31 
32 #include "vkBuilderUtil.hpp"
33 #include "vkImageWithMemory.hpp"
34 #include "vkBufferWithMemory.hpp"
35 #include "vkObjUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkImageUtil.hpp"
39 #include "vkBarrierUtil.hpp"
40 #include "vkPipelineConstructionUtil.hpp"
41 
42 #include "tcuImageCompare.hpp"
43 #include "tcuTestLog.hpp"
44 #include "tcuTextureUtil.hpp"
45 
46 #include "deRandom.hpp"
47 
48 #include <utility>
49 #include <vector>
50 #include <string>
51 #include <sstream>
52 #include <set>
53 #include <memory>
54 
55 namespace vkt
56 {
57 namespace MeshShader
58 {
59 
60 namespace
61 {
62 
63 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
64 
65 using namespace vk;
66 
commonMeshFragShader()67 std::string commonMeshFragShader()
68 {
69     std::string frag = "#version 450\n"
70                        "#extension GL_EXT_mesh_shader : enable\n"
71                        "\n"
72                        "layout (location=0) in perprimitiveEXT vec4 triangleColor;\n"
73                        "layout (location=0) out vec4 outColor;\n"
74                        "\n"
75                        "void main ()\n"
76                        "{\n"
77                        "    outColor = triangleColor;\n"
78                        "}\n";
79     return frag;
80 }
81 
getClearColor()82 tcu::Vec4 getClearColor()
83 {
84     return tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f);
85 }
86 
makeMeshGraphicsPipeline(GraphicsPipelineWrapper & maker,const PipelineLayoutWrapper & pipelineLayout,const ShaderWrapper taskShader,const ShaderWrapper meshShader,const ShaderWrapper fragShader,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const uint32_t subpass=0u,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo=nullptr,VkPipelineFragmentShadingRateStateCreateInfoKHR * fragmentShadingRateStateCreateInfo=nullptr)87 void makeMeshGraphicsPipeline(
88     GraphicsPipelineWrapper &maker, const PipelineLayoutWrapper &pipelineLayout, const ShaderWrapper taskShader,
89     const ShaderWrapper meshShader, const ShaderWrapper fragShader, const VkRenderPass renderPass,
90     const std::vector<VkViewport> &viewports, const std::vector<VkRect2D> &scissors, const uint32_t subpass = 0u,
91     const VkPipelineDepthStencilStateCreateInfo *depthStencilStateCreateInfo            = nullptr,
92     VkPipelineFragmentShadingRateStateCreateInfoKHR *fragmentShadingRateStateCreateInfo = nullptr)
93 {
94 #ifndef CTS_USES_VULKANSC
95     maker.setDefaultMultisampleState()
96         .setDefaultColorBlendState()
97         .setDefaultRasterizationState()
98         .setDefaultDepthStencilState()
99         .setupPreRasterizationMeshShaderState(viewports, scissors, pipelineLayout, renderPass, subpass, taskShader,
100                                               meshShader, nullptr, nullptr, nullptr, fragmentShadingRateStateCreateInfo)
101         .setupFragmentShaderState(pipelineLayout, renderPass, subpass, fragShader, depthStencilStateCreateInfo)
102         .setupFragmentOutputState(renderPass, subpass)
103         .setMonolithicPipelineLayout(pipelineLayout)
104         .buildPipeline();
105 #else
106     DE_ASSERT(false);
107 #endif // CTS_USES_VULKANSC
108 }
109 
110 struct MeshTriangleRendererParams
111 {
112     PipelineConstructionType constructionType;
113     std::vector<tcu::Vec4> vertexCoords;
114     std::vector<uint32_t> vertexIndices;
115     uint32_t taskCount;
116     tcu::Vec4 expectedColor;
117     bool rasterizationDisabled;
118 
MeshTriangleRendererParamsvkt::MeshShader::__anon926f16030111::MeshTriangleRendererParams119     MeshTriangleRendererParams(PipelineConstructionType constructionType_, std::vector<tcu::Vec4> vertexCoords_,
120                                std::vector<uint32_t> vertexIndices_, uint32_t taskCount_,
121                                const tcu::Vec4 &expectedColor_, bool rasterizationDisabled_ = false)
122         : constructionType(constructionType_)
123         , vertexCoords(std::move(vertexCoords_))
124         , vertexIndices(std::move(vertexIndices_))
125         , taskCount(taskCount_)
126         , expectedColor(expectedColor_)
127         , rasterizationDisabled(rasterizationDisabled_)
128     {
129     }
130 
MeshTriangleRendererParamsvkt::MeshShader::__anon926f16030111::MeshTriangleRendererParams131     MeshTriangleRendererParams(MeshTriangleRendererParams &&other)
132         : MeshTriangleRendererParams(other.constructionType, std::move(other.vertexCoords),
133                                      std::move(other.vertexIndices), other.taskCount, other.expectedColor,
134                                      other.rasterizationDisabled)
135     {
136     }
137 };
138 
139 class MeshOnlyTriangleCase : public vkt::TestCase
140 {
141 public:
MeshOnlyTriangleCase(tcu::TestContext & testCtx,const std::string & name,PipelineConstructionType constructionType,bool rasterizationDisabled=false)142     MeshOnlyTriangleCase(tcu::TestContext &testCtx, const std::string &name, PipelineConstructionType constructionType,
143                          bool rasterizationDisabled = false)
144         : vkt::TestCase(testCtx, name)
145         , m_constructionType(constructionType)
146         , m_rasterizationDisabled(rasterizationDisabled)
147     {
148     }
~MeshOnlyTriangleCase(void)149     virtual ~MeshOnlyTriangleCase(void)
150     {
151     }
152 
153     void initPrograms(vk::SourceCollections &programCollection) const override;
154     TestInstance *createInstance(Context &context) const override;
155     void checkSupport(Context &context) const override;
156 
157 protected:
158     const PipelineConstructionType m_constructionType;
159     const bool m_rasterizationDisabled;
160 };
161 
162 class MeshTaskTriangleCase : public vkt::TestCase
163 {
164 public:
MeshTaskTriangleCase(tcu::TestContext & testCtx,const std::string & name,PipelineConstructionType constructionType)165     MeshTaskTriangleCase(tcu::TestContext &testCtx, const std::string &name, PipelineConstructionType constructionType)
166         : vkt::TestCase(testCtx, name)
167         , m_constructionType(constructionType)
168     {
169     }
~MeshTaskTriangleCase(void)170     virtual ~MeshTaskTriangleCase(void)
171     {
172     }
173 
174     void initPrograms(vk::SourceCollections &programCollection) const override;
175     TestInstance *createInstance(Context &context) const override;
176     void checkSupport(Context &context) const override;
177 
178 protected:
179     const PipelineConstructionType m_constructionType;
180 };
181 
182 // Note: not actually task-only. The task shader will not emit mesh shader work groups.
183 class TaskOnlyTriangleCase : public vkt::TestCase
184 {
185 public:
TaskOnlyTriangleCase(tcu::TestContext & testCtx,const std::string & name,PipelineConstructionType constructionType)186     TaskOnlyTriangleCase(tcu::TestContext &testCtx, const std::string &name, PipelineConstructionType constructionType)
187         : vkt::TestCase(testCtx, name)
188         , m_constructionType(constructionType)
189     {
190     }
~TaskOnlyTriangleCase(void)191     virtual ~TaskOnlyTriangleCase(void)
192     {
193     }
194 
195     void initPrograms(vk::SourceCollections &programCollection) const override;
196     TestInstance *createInstance(Context &context) const override;
197     void checkSupport(Context &context) const override;
198 
199 protected:
200     const PipelineConstructionType m_constructionType;
201 };
202 
203 class MeshTriangleRenderer : public vkt::TestInstance
204 {
205 public:
MeshTriangleRenderer(Context & context,MeshTriangleRendererParams params)206     MeshTriangleRenderer(Context &context, MeshTriangleRendererParams params)
207         : vkt::TestInstance(context)
208         , m_params(std::move(params))
209     {
210     }
~MeshTriangleRenderer(void)211     virtual ~MeshTriangleRenderer(void)
212     {
213     }
214 
215     tcu::TestStatus iterate(void) override;
216 
217 protected:
218     MeshTriangleRendererParams m_params;
219 };
220 
checkSupport(Context & context) const221 void MeshOnlyTriangleCase::checkSupport(Context &context) const
222 {
223     checkTaskMeshShaderSupportEXT(context, false, true);
224     checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
225                                           m_constructionType);
226 }
227 
checkSupport(Context & context) const228 void MeshTaskTriangleCase::checkSupport(Context &context) const
229 {
230     checkTaskMeshShaderSupportEXT(context, true, true);
231     checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
232                                           m_constructionType);
233 }
234 
checkSupport(Context & context) const235 void TaskOnlyTriangleCase::checkSupport(Context &context) const
236 {
237     checkTaskMeshShaderSupportEXT(context, true, true);
238     checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
239                                           m_constructionType);
240 }
241 
initPrograms(SourceCollections & dst) const242 void MeshOnlyTriangleCase::initPrograms(SourceCollections &dst) const
243 {
244     const auto buildOptions = getMinMeshEXTBuildOptions(dst.usedVulkanVersion);
245 
246     std::ostringstream mesh;
247     mesh << "#version 450\n"
248          << "#extension GL_EXT_mesh_shader : enable\n"
249          << "\n"
250          // We will actually output a single triangle and most invocations will do no work.
251          << "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
252          << "layout(triangles) out;\n"
253          << "layout(max_vertices=256, max_primitives=256) out;\n"
254          << "\n"
255          // Unique vertex coordinates.
256          << "layout (set=0, binding=0) uniform CoordsBuffer {\n"
257          << "    vec4 coords[3];\n"
258          << "} cb;\n"
259          // Unique vertex indices.
260          << "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
261          << "    uint indices[3];\n"
262          << "} ib;\n"
263          << "\n"
264          // Triangle color.
265          << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
266          << "\n"
267          << "void main ()\n"
268          << "{\n"
269          << "    SetMeshOutputsEXT(3u, 1u);\n"
270          << "    triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
271          << "\n"
272          << "    const uint vertexIndex = gl_LocalInvocationIndex;\n"
273          << "    if (vertexIndex < 3u)\n"
274          << "    {\n"
275          << "        const uint coordsIndex = ib.indices[vertexIndex];\n"
276          << "        gl_MeshVerticesEXT[vertexIndex].gl_Position = cb.coords[coordsIndex];\n"
277          << "    }\n"
278          << "    if (vertexIndex == 0u)\n"
279          << "    {\n"
280          << "        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
281          << "    }\n"
282          << "}\n";
283     dst.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
284 
285     dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader()) << buildOptions;
286 }
287 
initPrograms(SourceCollections & dst) const288 void MeshTaskTriangleCase::initPrograms(SourceCollections &dst) const
289 {
290     const auto buildOptions = getMinMeshEXTBuildOptions(dst.usedVulkanVersion);
291 
292     std::string taskDataDecl = "struct TaskData {\n"
293                                "    uint triangleIndex;\n"
294                                "};\n"
295                                "taskPayloadSharedEXT TaskData td;\n";
296 
297     std::ostringstream task;
298     task
299         // Each work group spawns 1 task each (2 in total) and each task will draw 1 triangle.
300         << "#version 460\n"
301         << "#extension GL_EXT_mesh_shader : enable\n"
302         << "\n"
303         << "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
304         << "\n"
305         << taskDataDecl << "\n"
306         << "void main ()\n"
307         << "{\n"
308         << "    if (gl_LocalInvocationIndex == 0u)\n"
309         << "    {\n"
310         << "        td.triangleIndex = gl_WorkGroupID.x;\n"
311         << "    }\n"
312         << "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
313         << "}\n";
314     ;
315     dst.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
316 
317     std::ostringstream mesh;
318     mesh << "#version 460\n"
319          << "#extension GL_EXT_mesh_shader : enable\n"
320          << "\n"
321          // We will actually output a single triangle and most invocations will do no work.
322          << "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
323          << "layout(triangles) out;\n"
324          << "layout(max_vertices=256, max_primitives=256) out;\n"
325          << "\n"
326          // Unique vertex coordinates.
327          << "layout (set=0, binding=0) uniform CoordsBuffer {\n"
328          << "    vec4 coords[4];\n"
329          << "} cb;\n"
330          // Unique vertex indices.
331          << "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
332          << "    uint indices[6];\n"
333          << "} ib;\n"
334          << "\n"
335          // Triangle color.
336          << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
337          << "\n"
338          << taskDataDecl << "\n"
339          << "void main ()\n"
340          << "{\n"
341          << "    SetMeshOutputsEXT(3u, 1u);\n"
342          << "\n"
343          // Each "active" invocation will copy one vertex.
344          << "    const uint triangleVertex = gl_LocalInvocationIndex;\n"
345          << "    const uint indexArrayPos  = td.triangleIndex * 3u + triangleVertex;\n"
346          << "\n"
347          << "    if (triangleVertex < 3u)\n"
348          << "    {\n"
349          << "        const uint coordsIndex = ib.indices[indexArrayPos];\n"
350          // Copy vertex coordinates.
351          << "        gl_MeshVerticesEXT[triangleVertex].gl_Position = cb.coords[coordsIndex];\n"
352          // Index renumbering: final indices will always be 0, 1, 2.
353          << "    }\n"
354          << "    if (triangleVertex == 0u)\n"
355          << "    {\n"
356          << "        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
357          << "        triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
358          << "    }\n"
359          << "}\n";
360     dst.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
361 
362     dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader()) << buildOptions;
363 }
364 
initPrograms(SourceCollections & dst) const365 void TaskOnlyTriangleCase::initPrograms(SourceCollections &dst) const
366 {
367     const auto buildOptions = getMinMeshEXTBuildOptions(dst.usedVulkanVersion);
368 
369     // The task shader does not spawn any mesh shader invocations.
370     std::ostringstream task;
371     task << "#version 450\n"
372          << "#extension GL_EXT_mesh_shader : enable\n"
373          << "\n"
374          << "layout(local_size_x=1) in;\n"
375          << "\n"
376          << "void main ()\n"
377          << "{\n"
378          << "    EmitMeshTasksEXT(0u, 0u, 0u);\n"
379          << "}\n";
380     dst.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
381 
382     // Same shader as the mesh only case, but it should not be launched.
383     std::ostringstream mesh;
384     mesh << "#version 450\n"
385          << "#extension GL_EXT_mesh_shader : enable\n"
386          << "\n"
387          // We will actually output a single triangle and most invocations will do no work.
388          << "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
389          << "layout(triangles) out;\n"
390          << "layout(max_vertices=256, max_primitives=256) out;\n"
391          << "\n"
392          << "layout (set=0, binding=0) uniform CoordsBuffer {\n"
393          << "    vec4 coords[3];\n"
394          << "} cb;\n"
395          << "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
396          << "    uint indices[3];\n"
397          << "} ib;\n"
398          << "\n"
399          << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
400          << "\n"
401          << "void main ()\n"
402          << "{\n"
403          << "    SetMeshOutputsEXT(3u, 1u);\n"
404          << "    triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
405          << "\n"
406          << "    const uint vertexIndex = gl_LocalInvocationIndex;\n"
407          << "    if (vertexIndex < 3u)\n"
408          << "    {\n"
409          << "        const uint coordsIndex = ib.indices[vertexIndex];\n"
410          << "        gl_MeshVerticesEXT[vertexIndex].gl_Position = cb.coords[coordsIndex];\n"
411          << "    }\n"
412          << "    if (vertexIndex == 0u)\n"
413          << "    {\n"
414          << "        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
415          << "    }\n"
416          << "}\n";
417     dst.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
418 
419     dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader()) << buildOptions;
420 }
421 
createInstance(Context & context) const422 TestInstance *MeshOnlyTriangleCase::createInstance(Context &context) const
423 {
424     const std::vector<tcu::Vec4> vertexCoords = {
425         tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
426         tcu::Vec4(-1.0f, 3.0f, 0.0f, 1.0f),
427         tcu::Vec4(3.0f, -1.0f, 0.0f, 1.0f),
428     };
429     const std::vector<uint32_t> vertexIndices = {0u, 1u, 2u};
430     const auto expectedColor = (m_rasterizationDisabled ? getClearColor() : tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f));
431     MeshTriangleRendererParams params(m_constructionType, std::move(vertexCoords), std::move(vertexIndices), 1u,
432                                       expectedColor, m_rasterizationDisabled);
433 
434     return new MeshTriangleRenderer(context, std::move(params));
435 }
436 
createInstance(Context & context) const437 TestInstance *MeshTaskTriangleCase::createInstance(Context &context) const
438 {
439     const std::vector<tcu::Vec4> vertexCoords = {
440         tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
441         tcu::Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
442         tcu::Vec4(1.0f, -1.0f, 0.0f, 1.0f),
443         tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
444     };
445     const std::vector<uint32_t> vertexIndices = {2u, 0u, 1u, 1u, 3u, 2u};
446     MeshTriangleRendererParams params(m_constructionType, std::move(vertexCoords), std::move(vertexIndices), 2u,
447                                       tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f));
448 
449     return new MeshTriangleRenderer(context, std::move(params));
450 }
451 
createInstance(Context & context) const452 TestInstance *TaskOnlyTriangleCase::createInstance(Context &context) const
453 {
454     const std::vector<tcu::Vec4> vertexCoords = {
455         tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
456         tcu::Vec4(-1.0f, 3.0f, 0.0f, 1.0f),
457         tcu::Vec4(3.0f, -1.0f, 0.0f, 1.0f),
458     };
459     const std::vector<uint32_t> vertexIndices = {0u, 1u, 2u};
460     // Note we expect the clear color.
461     MeshTriangleRendererParams params(m_constructionType, std::move(vertexCoords), std::move(vertexIndices), 1u,
462                                       getClearColor());
463 
464     return new MeshTriangleRenderer(context, std::move(params));
465 }
466 
iterate()467 tcu::TestStatus MeshTriangleRenderer::iterate()
468 {
469     const auto &vki           = m_context.getInstanceInterface();
470     const auto &vkd           = m_context.getDeviceInterface();
471     const auto physicalDevice = m_context.getPhysicalDevice();
472     const auto device         = m_context.getDevice();
473     auto &alloc               = m_context.getDefaultAllocator();
474     const auto qIndex         = m_context.getUniversalQueueFamilyIndex();
475     const auto queue          = m_context.getUniversalQueue();
476 
477     const auto vertexBufferStages = VK_SHADER_STAGE_MESH_BIT_EXT;
478     const auto vertexBufferSize   = static_cast<VkDeviceSize>(de::dataSize(m_params.vertexCoords));
479     const auto vertexBufferUsage  = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
480     const auto vertexBufferLoc    = DescriptorSetUpdateBuilder::Location::binding(0u);
481     const auto vertexBufferType   = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
482 
483     const auto indexBufferStages = VK_SHADER_STAGE_MESH_BIT_EXT;
484     const auto indexBufferSize   = static_cast<VkDeviceSize>(de::dataSize(m_params.vertexIndices));
485     const auto indexBufferUsage  = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
486     const auto indexBufferLoc    = DescriptorSetUpdateBuilder::Location::binding(1u);
487     const auto indexBufferType   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
488 
489     // Vertex buffer.
490     const auto vertexBufferInfo = makeBufferCreateInfo(vertexBufferSize, vertexBufferUsage);
491     BufferWithMemory vertexBuffer(vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
492     auto &vertexBufferAlloc   = vertexBuffer.getAllocation();
493     void *vertexBufferDataPtr = vertexBufferAlloc.getHostPtr();
494 
495     deMemcpy(vertexBufferDataPtr, m_params.vertexCoords.data(), static_cast<size_t>(vertexBufferSize));
496     flushAlloc(vkd, device, vertexBufferAlloc);
497 
498     // Index buffer.
499     const auto indexBufferInfo = makeBufferCreateInfo(indexBufferSize, indexBufferUsage);
500     BufferWithMemory indexBuffer(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible);
501     auto &indexBufferAlloc   = indexBuffer.getAllocation();
502     void *indexBufferDataPtr = indexBufferAlloc.getHostPtr();
503 
504     deMemcpy(indexBufferDataPtr, m_params.vertexIndices.data(), static_cast<size_t>(indexBufferSize));
505     flushAlloc(vkd, device, indexBufferAlloc);
506 
507     // Color buffer.
508     const auto colorBufferFormat = VK_FORMAT_R8G8B8A8_UNORM;
509     const auto colorBufferExtent = makeExtent3D(8u, 8u, 1u);
510     const auto colorBufferUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
511 
512     const VkImageCreateInfo colorBufferInfo = {
513         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
514         nullptr,                             // const void* pNext;
515         0u,                                  // VkImageCreateFlags flags;
516         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
517         colorBufferFormat,                   // VkFormat format;
518         colorBufferExtent,                   // VkExtent3D extent;
519         1u,                                  // uint32_t mipLevels;
520         1u,                                  // uint32_t arrayLayers;
521         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
522         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
523         colorBufferUsage,                    // VkImageUsageFlags usage;
524         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
525         0u,                                  // uint32_t queueFamilyIndexCount;
526         nullptr,                             // const uint32_t* pQueueFamilyIndices;
527         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
528     };
529     ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
530 
531     const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
532     const auto colorBufferView =
533         makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, colorBufferFormat, colorSRR);
534 
535     // Render pass.
536     const auto renderPass = makeRenderPass(vkd, device, colorBufferFormat);
537 
538     // Framebuffer.
539     const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(),
540                                              colorBufferExtent.width, colorBufferExtent.height);
541 
542     // Set layout.
543     DescriptorSetLayoutBuilder layoutBuilder;
544     layoutBuilder.addSingleBinding(vertexBufferType, vertexBufferStages);
545     layoutBuilder.addSingleBinding(indexBufferType, indexBufferStages);
546     const auto setLayout = layoutBuilder.build(vkd, device);
547 
548     // Descriptor pool.
549     DescriptorPoolBuilder poolBuilder;
550     poolBuilder.addType(vertexBufferType);
551     poolBuilder.addType(indexBufferType);
552     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
553 
554     // Descriptor set.
555     const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
556 
557     // Update descriptor set.
558     DescriptorSetUpdateBuilder updateBuilder;
559     const auto vertexBufferDescInfo = makeDescriptorBufferInfo(vertexBuffer.get(), 0ull, vertexBufferSize);
560     const auto indexBufferDescInfo  = makeDescriptorBufferInfo(indexBuffer.get(), 0ull, indexBufferSize);
561     updateBuilder.writeSingle(descriptorSet.get(), vertexBufferLoc, vertexBufferType, &vertexBufferDescInfo);
562     updateBuilder.writeSingle(descriptorSet.get(), indexBufferLoc, indexBufferType, &indexBufferDescInfo);
563     updateBuilder.update(vkd, device);
564 
565     // Pipeline layout.
566     const PipelineLayoutWrapper pipelineLayout(m_params.constructionType, vkd, device, setLayout.get());
567 
568     // Shader modules.
569     ShaderWrapper taskModule;
570     ShaderWrapper fragModule;
571     const auto &binaries = m_context.getBinaryCollection();
572 
573     if (binaries.contains("task"))
574         taskModule = ShaderWrapper(vkd, device, binaries.get("task"), 0u);
575     if (!m_params.rasterizationDisabled)
576         fragModule = ShaderWrapper(vkd, device, binaries.get("frag"), 0u);
577     const auto meshModule = ShaderWrapper(vkd, device, binaries.get("mesh"), 0u);
578 
579     // Graphics pipeline.
580     std::vector<VkViewport> viewports(1u, makeViewport(colorBufferExtent));
581     std::vector<VkRect2D> scissors(1u, makeRect2D(colorBufferExtent));
582     GraphicsPipelineWrapper pipelineMaker(vki, vkd, physicalDevice, device, m_context.getDeviceExtensions(),
583                                           m_params.constructionType);
584 
585     makeMeshGraphicsPipeline(pipelineMaker, pipelineLayout, taskModule, meshModule, fragModule, renderPass.get(),
586                              viewports, scissors);
587     const auto pipeline = pipelineMaker.getPipeline();
588 
589     // Command pool and buffer.
590     const auto cmdPool      = makeCommandPool(vkd, device, qIndex);
591     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
592     const auto cmdBuffer    = cmdBufferPtr.get();
593 
594     // Output buffer.
595     const auto tcuFormat      = mapVkFormat(colorBufferFormat);
596     const auto outBufferSize  = static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) *
597                                                          colorBufferExtent.width * colorBufferExtent.height);
598     const auto outBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
599     const auto outBufferInfo  = makeBufferCreateInfo(outBufferSize, outBufferUsage);
600     BufferWithMemory outBuffer(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible);
601     auto &outBufferAlloc = outBuffer.getAllocation();
602     void *outBufferData  = outBufferAlloc.getHostPtr();
603 
604     // Draw triangle.
605     beginCommandBuffer(vkd, cmdBuffer);
606     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), getClearColor());
607     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
608                               &descriptorSet.get(), 0u, nullptr);
609     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
610     vkd.cmdDrawMeshTasksEXT(cmdBuffer, m_params.taskCount, 1u, 1u);
611     endRenderPass(vkd, cmdBuffer);
612 
613     // Copy color buffer to output buffer.
614     const tcu::IVec3 imageDim(static_cast<int>(colorBufferExtent.width), static_cast<int>(colorBufferExtent.height),
615                               static_cast<int>(colorBufferExtent.depth));
616     const tcu::IVec2 imageSize(imageDim.x(), imageDim.y());
617 
618     copyImageToBuffer(vkd, cmdBuffer, colorBuffer.get(), outBuffer.get(), imageSize);
619     endCommandBuffer(vkd, cmdBuffer);
620     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
621 
622     // Invalidate alloc.
623     invalidateAlloc(vkd, device, outBufferAlloc);
624     tcu::ConstPixelBufferAccess outPixels(tcuFormat, imageDim, outBufferData);
625 
626     auto &log = m_context.getTestContext().getLog();
627     const tcu::Vec4 threshold(0.0f); // The color can be represented exactly.
628 
629     if (!tcu::floatThresholdCompare(log, "Result", "", m_params.expectedColor, outPixels, threshold,
630                                     tcu::COMPARE_LOG_EVERYTHING))
631         return tcu::TestStatus::fail("Failed; check log for details");
632 
633     return tcu::TestStatus::pass("Pass");
634 }
635 
gradientImageExtent()636 VkExtent3D gradientImageExtent()
637 {
638     return makeExtent3D(256u, 256u, 1u);
639 }
640 
641 struct GradientParams
642 {
643     tcu::Maybe<FragmentSize> fragmentSize;
644     PipelineConstructionType constructionType;
645 
GradientParamsvkt::MeshShader::__anon926f16030111::GradientParams646     GradientParams(const tcu::Maybe<FragmentSize> &fragmentSize_, PipelineConstructionType constructionType_)
647         : fragmentSize(fragmentSize_)
648         , constructionType(constructionType_)
649     {
650     }
651 };
652 
checkMeshSupport(Context & context,GradientParams params)653 void checkMeshSupport(Context &context, GradientParams params)
654 {
655     checkTaskMeshShaderSupportEXT(context, false, true);
656 
657     if (static_cast<bool>(params.fragmentSize))
658     {
659         const auto &features = context.getMeshShaderFeaturesEXT();
660         if (!features.primitiveFragmentShadingRateMeshShader)
661             TCU_THROW(NotSupportedError, "Primitive fragment shading rate not supported in mesh shaders");
662     }
663 
664     checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
665                                           params.constructionType);
666 }
667 
initGradientPrograms(vk::SourceCollections & programCollection,GradientParams params)668 void initGradientPrograms(vk::SourceCollections &programCollection, GradientParams params)
669 {
670     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
671     const auto extent       = gradientImageExtent();
672 
673     std::ostringstream frag;
674     frag << "#version 450\n"
675          << "\n"
676          << "layout (location=0) in  vec4 inColor;\n"
677          << "layout (location=0) out vec4 outColor;\n"
678          << "\n"
679          << "void main ()\n"
680          << "{\n"
681          << "    outColor = inColor;\n"
682          << "}\n";
683     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
684 
685     std::string fragmentSizeStr;
686     const auto useFragmentSize = static_cast<bool>(params.fragmentSize);
687 
688     if (useFragmentSize)
689     {
690         const auto &fragSize = params.fragmentSize.get();
691         fragmentSizeStr      = getGLSLShadingRateMask(fragSize);
692 
693         const auto val = getSPVShadingRateValue(fragSize);
694         DE_ASSERT(val != 0);
695         DE_UNREF(val); // For release builds.
696     }
697 
698     std::ostringstream mesh;
699     mesh << "#version 450\n"
700          << "#extension GL_EXT_mesh_shader : enable\n";
701 
702     if (useFragmentSize)
703         mesh << "#extension GL_EXT_fragment_shading_rate : enable\n";
704 
705     mesh << "\n"
706          << "layout(local_size_x=4) in;\n"
707          << "layout(triangles) out;\n"
708          << "layout(max_vertices=256, max_primitives=256) out;\n"
709          << "\n"
710          << "layout (location=0) out vec4 outColor[];\n"
711          << "\n";
712 
713     if (useFragmentSize)
714     {
715         mesh << "perprimitiveEXT out gl_MeshPerPrimitiveEXT {\n"
716              << "   int gl_PrimitiveShadingRateEXT;\n"
717              << "} gl_MeshPrimitivesEXT[];\n"
718              << "\n";
719     }
720 
721     mesh << "void main ()\n"
722          << "{\n"
723          << "    SetMeshOutputsEXT(4u, 2u);\n"
724          << "\n"
725          << "    const uint vertex    = gl_LocalInvocationIndex;\n"
726          << "    const uint primitive = gl_LocalInvocationIndex;\n"
727          << "\n"
728          << "    const vec4 topLeft      = vec4(-1.0, -1.0, 0.0, 1.0);\n"
729          << "    const vec4 botLeft      = vec4(-1.0,  1.0, 0.0, 1.0);\n"
730          << "    const vec4 topRight     = vec4( 1.0, -1.0, 0.0, 1.0);\n"
731          << "    const vec4 botRight     = vec4( 1.0,  1.0, 0.0, 1.0);\n"
732          << "    const vec4 positions[4] = vec4[](topLeft, botLeft, topRight, botRight);\n"
733          << "\n"
734          // Green changes according to the width.
735          // Blue changes according to the height.
736          // Value 0 at the center of the first pixel and value 1 at the center of the last pixel.
737          << "    const float width      = " << extent.width << ";\n"
738          << "    const float height     = " << extent.height << ";\n"
739          << "    const float halfWidth  = (1.0 / (width - 1.0)) / 2.0;\n"
740          << "    const float halfHeight = (1.0 / (height - 1.0)) / 2.0;\n"
741          << "    const float minGreen   = -halfWidth;\n"
742          << "    const float maxGreen   = 1.0+halfWidth;\n"
743          << "    const float minBlue    = -halfHeight;\n"
744          << "    const float maxBlue    = 1.0+halfHeight;\n"
745          << "    const vec4  colors[4]  = vec4[](\n"
746          << "        vec4(0, minGreen, minBlue, 1.0),\n"
747          << "        vec4(0, minGreen, maxBlue, 1.0),\n"
748          << "        vec4(0, maxGreen, minBlue, 1.0),\n"
749          << "        vec4(0, maxGreen, maxBlue, 1.0)\n"
750          << "    );\n"
751          << "\n"
752          << "    const uvec3 indices[2] = uvec3[](\n"
753          << "        uvec3(0, 1, 2),\n"
754          << "        uvec3(1, 3, 2)\n"
755          << "    );\n"
756          << "    if (vertex < 4u)\n"
757          << "    {\n"
758          << "        gl_MeshVerticesEXT[vertex].gl_Position = positions[vertex];\n"
759          << "        outColor[vertex] = colors[vertex];\n"
760          << "    }\n"
761          << "    if (primitive < 2u)\n"
762          << "    {\n";
763 
764     if (useFragmentSize)
765     {
766         mesh << "        gl_MeshPrimitivesEXT[primitive].gl_PrimitiveShadingRateEXT = " << fragmentSizeStr << ";\n";
767     }
768 
769     mesh << "        gl_PrimitiveTriangleIndicesEXT[primitive] = indices[primitive];\n"
770          << "    }\n"
771          << "}\n";
772     ;
773     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
774 }
775 
coordColorFormat(int x,int y,const tcu::Vec4 & color)776 std::string coordColorFormat(int x, int y, const tcu::Vec4 &color)
777 {
778     std::ostringstream msg;
779     msg << "[" << x << ", " << y << "]=(" << color.x() << ", " << color.y() << ", " << color.z() << ", " << color.w()
780         << ")";
781     return msg.str();
782 }
783 
testFullscreenGradient(Context & context,GradientParams params)784 tcu::TestStatus testFullscreenGradient(Context &context, GradientParams params)
785 {
786     const auto &vki                = context.getInstanceInterface();
787     const auto &vkd                = context.getDeviceInterface();
788     const auto physicalDevice      = context.getPhysicalDevice();
789     const auto device              = context.getDevice();
790     auto &alloc                    = context.getDefaultAllocator();
791     const auto qIndex              = context.getUniversalQueueFamilyIndex();
792     const auto queue               = context.getUniversalQueue();
793     const auto useFragmentSize     = static_cast<bool>(params.fragmentSize);
794     const auto defaultFragmentSize = FragmentSize::SIZE_1X1;
795     const auto rateSize = getShadingRateSize(useFragmentSize ? params.fragmentSize.get() : defaultFragmentSize);
796 
797     // Color buffer.
798     const auto colorBufferFormat = VK_FORMAT_R8G8B8A8_UNORM;
799     const auto colorBufferExtent =
800         makeExtent3D(256u, 256u, 1u); // Big enough for a detailed gradient, small enough to get unique colors.
801     const auto colorBufferUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
802 
803     const VkImageCreateInfo colorBufferInfo = {
804         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
805         nullptr,                             // const void* pNext;
806         0u,                                  // VkImageCreateFlags flags;
807         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
808         colorBufferFormat,                   // VkFormat format;
809         colorBufferExtent,                   // VkExtent3D extent;
810         1u,                                  // uint32_t mipLevels;
811         1u,                                  // uint32_t arrayLayers;
812         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
813         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
814         colorBufferUsage,                    // VkImageUsageFlags usage;
815         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
816         0u,                                  // uint32_t queueFamilyIndexCount;
817         nullptr,                             // const uint32_t* pQueueFamilyIndices;
818         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
819     };
820     ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
821 
822     const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
823     const auto colorBufferView =
824         makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, colorBufferFormat, colorSRR);
825 
826     // Render pass.
827     const auto renderPass = makeRenderPass(vkd, device, colorBufferFormat);
828 
829     // Framebuffer.
830     const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(),
831                                              colorBufferExtent.width, colorBufferExtent.height);
832 
833     // Set layout.
834     DescriptorSetLayoutBuilder layoutBuilder;
835     const auto setLayout = layoutBuilder.build(vkd, device);
836 
837     // Pipeline layout.
838     const PipelineLayoutWrapper pipelineLayout(params.constructionType, vkd, device, setLayout.get());
839 
840     // Shader modules.
841     ShaderWrapper taskModule;
842     const auto &binaries = context.getBinaryCollection();
843 
844     const auto meshModule = ShaderWrapper(vkd, device, binaries.get("mesh"), 0u);
845     const auto fragModule = ShaderWrapper(vkd, device, binaries.get("frag"), 0u);
846 
847     using ShadingRateInfoPtr = de::MovePtr<VkPipelineFragmentShadingRateStateCreateInfoKHR>;
848     ShadingRateInfoPtr pNext;
849     if (useFragmentSize)
850     {
851         pNext  = ShadingRateInfoPtr(new VkPipelineFragmentShadingRateStateCreateInfoKHR);
852         *pNext = initVulkanStructure();
853 
854         pNext->fragmentSize = getShadingRateSize(
855             FragmentSize::SIZE_1X1); // 1x1 will not be used as the primitive rate in tests with fragment size.
856         pNext->combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR;
857         pNext->combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
858     }
859 
860     // Graphics pipeline.
861     std::vector<VkViewport> viewports(1u, makeViewport(colorBufferExtent));
862     std::vector<VkRect2D> scissors(1u, makeRect2D(colorBufferExtent));
863     GraphicsPipelineWrapper pipelineMaker(vki, vkd, physicalDevice, device, context.getDeviceExtensions(),
864                                           params.constructionType);
865 
866     makeMeshGraphicsPipeline(pipelineMaker, pipelineLayout, taskModule, meshModule, fragModule, renderPass.get(),
867                              viewports, scissors, 0u, nullptr, pNext.get());
868     const auto pipeline = pipelineMaker.getPipeline();
869 
870     // Command pool and buffer.
871     const auto cmdPool      = makeCommandPool(vkd, device, qIndex);
872     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
873     const auto cmdBuffer    = cmdBufferPtr.get();
874 
875     // Output buffer.
876     const auto tcuFormat      = mapVkFormat(colorBufferFormat);
877     const auto outBufferSize  = static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) *
878                                                          colorBufferExtent.width * colorBufferExtent.height);
879     const auto outBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
880     const auto outBufferInfo  = makeBufferCreateInfo(outBufferSize, outBufferUsage);
881     BufferWithMemory outBuffer(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible);
882     auto &outBufferAlloc = outBuffer.getAllocation();
883     void *outBufferData  = outBufferAlloc.getHostPtr();
884 
885     // Draw triangles.
886     beginCommandBuffer(vkd, cmdBuffer);
887     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), getClearColor());
888     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
889     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
890     endRenderPass(vkd, cmdBuffer);
891 
892     // Copy color buffer to output buffer.
893     const tcu::IVec3 imageDim(static_cast<int>(colorBufferExtent.width), static_cast<int>(colorBufferExtent.height),
894                               static_cast<int>(colorBufferExtent.depth));
895     const tcu::IVec2 imageSize(imageDim.x(), imageDim.y());
896 
897     copyImageToBuffer(vkd, cmdBuffer, colorBuffer.get(), outBuffer.get(), imageSize);
898     endCommandBuffer(vkd, cmdBuffer);
899     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
900 
901     // Invalidate alloc.
902     invalidateAlloc(vkd, device, outBufferAlloc);
903     tcu::ConstPixelBufferAccess outPixels(tcuFormat, imageDim, outBufferData);
904 
905     // Create reference image.
906     tcu::TextureLevel refLevel(tcuFormat, imageDim.x(), imageDim.y(), imageDim.z());
907     tcu::PixelBufferAccess refAccess(refLevel);
908     for (int y = 0; y < imageDim.y(); ++y)
909         for (int x = 0; x < imageDim.x(); ++x)
910         {
911             const tcu::IVec4 color(0, x, y, 255);
912             refAccess.setPixel(color, x, y);
913         }
914 
915     const tcu::TextureFormat maskFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8);
916     tcu::TextureLevel errorMask(maskFormat, imageDim.x(), imageDim.y(), imageDim.z());
917     tcu::PixelBufferAccess errorAccess(errorMask);
918     const tcu::Vec4 green(0.0f, 1.0f, 0.0f, 1.0f);
919     const tcu::Vec4 red(1.0f, 0.0f, 0.0f, 1.0f);
920     auto &log = context.getTestContext().getLog();
921 
922     // Each block needs to have the same color and be equal to one of the pixel colors of that block in the reference image.
923     const auto blockWidth  = static_cast<int>(rateSize.width);
924     const auto blockHeight = static_cast<int>(rateSize.height);
925 
926     tcu::clear(errorAccess, green);
927     bool globalFail = false;
928 
929     for (int y = 0; y < imageDim.y() / blockHeight; ++y)
930         for (int x = 0; x < imageDim.x() / blockWidth; ++x)
931         {
932             bool blockFail = false;
933             std::vector<tcu::Vec4> candidates;
934 
935             candidates.reserve(rateSize.width * rateSize.height);
936 
937             const auto cornerY     = y * blockHeight;
938             const auto cornerX     = x * blockWidth;
939             const auto cornerColor = outPixels.getPixel(cornerX, cornerY);
940 
941             for (int blockY = 0; blockY < blockHeight; ++blockY)
942                 for (int blockX = 0; blockX < blockWidth; ++blockX)
943                 {
944                     const auto absY     = cornerY + blockY;
945                     const auto absX     = cornerX + blockX;
946                     const auto resColor = outPixels.getPixel(absX, absY);
947 
948                     candidates.push_back(refAccess.getPixel(absX, absY));
949 
950                     if (cornerColor != resColor)
951                     {
952                         std::ostringstream msg;
953                         msg << "Block not uniform: " << coordColorFormat(cornerX, cornerY, cornerColor) << " vs "
954                             << coordColorFormat(absX, absY, resColor);
955                         log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
956 
957                         blockFail = true;
958                     }
959                 }
960 
961             if (!de::contains(begin(candidates), end(candidates), cornerColor))
962             {
963                 std::ostringstream msg;
964                 msg << "Block color does not match any reference color at [" << cornerX << ", " << cornerY << "]";
965                 log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
966                 blockFail = true;
967             }
968 
969             if (blockFail)
970             {
971                 const auto blockAccess = tcu::getSubregion(errorAccess, cornerX, cornerY, blockWidth, blockHeight);
972                 tcu::clear(blockAccess, red);
973                 globalFail = true;
974             }
975         }
976 
977     if (globalFail)
978     {
979         log << tcu::TestLog::Image("Result", "", outPixels);
980         log << tcu::TestLog::Image("Reference", "", refAccess);
981         log << tcu::TestLog::Image("ErrorMask", "", errorAccess);
982 
983         TCU_FAIL("Color mismatch; check log for more details");
984     }
985 
986     return tcu::TestStatus::pass("Pass");
987 }
988 
989 // Smoke test that emits one triangle per pixel plus one more global background triangle, but doesn't use every triangle. It only
990 // draws half the front triangles. It gets information from a mix of vertex buffers, per primitive buffers and push constants.
991 struct PartialUsageParams
992 {
993     PipelineConstructionType constructionType;
994     bool compactVertices;
995 };
996 
997 class PartialUsageCase : public vkt::TestCase
998 {
999 public:
1000     static constexpr uint32_t kWidth            = 16u;
1001     static constexpr uint32_t kHeight           = 16u;
1002     static constexpr uint32_t kLocalInvocations = 64u;
1003     static constexpr uint32_t kMaxPrimitives    = kLocalInvocations;
1004     static constexpr uint32_t kMaxVertices      = kMaxPrimitives * 3u;
1005     static constexpr uint32_t kNumWorkGroups    = 2u;
1006     static constexpr uint32_t kTotalPrimitives  = kNumWorkGroups * kMaxPrimitives;
1007 
PartialUsageCase(tcu::TestContext & testCtx,const std::string & name,const PartialUsageParams & params)1008     PartialUsageCase(tcu::TestContext &testCtx, const std::string &name, const PartialUsageParams &params)
1009         : vkt::TestCase(testCtx, name)
1010         , m_params(params)
1011     {
1012     }
~PartialUsageCase(void)1013     virtual ~PartialUsageCase(void)
1014     {
1015     }
1016 
1017     void checkSupport(Context &context) const override;
1018     void initPrograms(vk::SourceCollections &programCollection) const override;
1019     TestInstance *createInstance(Context &context) const override;
1020 
1021     struct IndexAndColor
1022     {
1023         uint32_t index;
1024         float color;
1025     };
1026 
1027     struct PushConstants
1028     {
1029         uint32_t totalTriangles;
1030         float depth;
1031         float red;
1032     };
1033 
1034 protected:
1035     PartialUsageParams m_params;
1036 };
1037 
1038 class PartialUsageInstance : public vkt::TestInstance
1039 {
1040 public:
PartialUsageInstance(Context & context,PipelineConstructionType constructionType)1041     PartialUsageInstance(Context &context, PipelineConstructionType constructionType)
1042         : vkt::TestInstance(context)
1043         , m_constructionType(constructionType)
1044     {
1045     }
~PartialUsageInstance(void)1046     virtual ~PartialUsageInstance(void)
1047     {
1048     }
1049 
1050     tcu::TestStatus iterate(void) override;
1051 
1052 protected:
1053     const PipelineConstructionType m_constructionType;
1054 };
1055 
checkSupport(Context & context) const1056 void PartialUsageCase::checkSupport(Context &context) const
1057 {
1058     checkTaskMeshShaderSupportEXT(context, true, true);
1059     checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1060                                           m_params.constructionType);
1061 }
1062 
createInstance(Context & context) const1063 TestInstance *PartialUsageCase::createInstance(Context &context) const
1064 {
1065     return new PartialUsageInstance(context, m_params.constructionType);
1066 }
1067 
initPrograms(vk::SourceCollections & programCollection) const1068 void PartialUsageCase::initPrograms(vk::SourceCollections &programCollection) const
1069 {
1070     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1071 
1072     // The task shader will always emit two mesh shader work groups, which may do some work.
1073     std::ostringstream task;
1074     task << "#version 450\n"
1075          << "#extension GL_EXT_mesh_shader : enable\n"
1076          << "\n"
1077          << "layout (local_size_x=1) in;\n"
1078          << "\n"
1079          << "void main ()\n"
1080          << "{\n"
1081          << "    EmitMeshTasksEXT(" << kNumWorkGroups << ", 1u, 1u);\n"
1082          << "}\n";
1083     programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1084 
1085     // The frag shader will color the output with the indicated color;
1086     std::ostringstream frag;
1087     frag << "#version 450\n"
1088          << "#extension GL_EXT_mesh_shader : enable\n"
1089          << "\n"
1090          << "layout (location=0) perprimitiveEXT in vec4 primitiveColor;\n"
1091          << "layout (location=0) out vec4 outColor;\n"
1092          << "\n"
1093          << "void main ()\n"
1094          << "{\n"
1095          << "    outColor = primitiveColor;\n"
1096          << "}\n";
1097     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
1098 
1099     // The mesh shader reads primitive indices and vertices data from buffers and push constants. The primitive data block contains
1100     // primitive indices and primitive colors that must be read by the current invocation using an index that depends on its global
1101     // invocation index. The primitive index allows access into the triangle vertices buffer. Depending on the current work group
1102     // index and total number of triangles (set by push constants), the current invocation may have to emit a primitive or not.
1103     //
1104     // In addition, the non-compacted variant emits some extra unused vertices at the start of the array.
1105     const auto kExtraVertices       = (m_params.compactVertices ? 0u : kLocalInvocations);
1106     const auto kLocationMaxVertices = kMaxVertices + kExtraVertices;
1107 
1108     if (!m_params.compactVertices)
1109         DE_ASSERT(kLocationMaxVertices <= 256u);
1110 
1111     std::ostringstream mesh;
1112     mesh << "#version 450\n"
1113          << "#extension GL_EXT_mesh_shader : enable\n"
1114          << "\n"
1115          << "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1116          << "layout (triangles) out;\n"
1117          << "layout (max_vertices=" << kLocationMaxVertices << ", max_primitives=" << kMaxPrimitives << ") out;\n"
1118          << "\n"
1119          << "layout (location=0) perprimitiveEXT out vec4 primitiveColor[];\n"
1120          << "\n"
1121          << "layout (set=0, binding=0, std430) readonly buffer VerticesBlock {\n"
1122          << "    vec2 coords[];\n" // 3 vertices per triangle.
1123          << "} vertex;\n"
1124          << "\n"
1125          << "struct IndexAndColor {\n"
1126          << "    uint  index;\n" // Triangle index (for accessing the coordinates buffer above).
1127          << "    float color;\n" // Triangle blue color component.
1128          << "};\n"
1129          << "\n"
1130          << "layout (set=0, binding=1, std430) readonly buffer PrimitiveDataBlock {\n"
1131          << "    IndexAndColor data[];\n"
1132          << "} primitive;\n"
1133          << "\n"
1134          << "layout (push_constant, std430) uniform PushConstantBlock {\n"
1135          << "    uint  totalTriangles;\n" // How many triangles in total we have to emit.
1136          << "    float depth;\n"          // Triangle depth (allows painting the background with a different color).
1137          << "    float red;\n"            // Triangle red color component.
1138          << "} pc;\n"
1139          << "\n"
1140          << "void main ()\n"
1141          << "{\n"
1142          // First primitive for this work group, plus the work group primitive and vertex count.
1143          << "    const uint firstPrimitive   = gl_WorkGroupID.x * gl_WorkGroupSize.x;\n"
1144          << "    const uint wgTriangleCount  = ((pc.totalTriangles >= firstPrimitive) ? min(pc.totalTriangles - "
1145             "firstPrimitive, "
1146          << kLocalInvocations << ") : 0u);\n"
1147          << "    const uint wgVertexCount    = wgTriangleCount * 3u + " << kExtraVertices << "u;\n"
1148          << "\n";
1149 
1150     if (!m_params.compactVertices)
1151     {
1152         // Produce extra unused vertices.
1153         mesh << "    {\n"
1154              << "        const float proportion = float(gl_LocalInvocationIndex) / float(gl_WorkGroupSize.x);\n"
1155              << "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(proportion, 1.0 - proportion, "
1156                 "pc.depth, 1.0);\n"
1157              << "    }\n"
1158              << "\n";
1159     }
1160 
1161     mesh
1162         << "    SetMeshOutputsEXT(wgVertexCount, wgTriangleCount);\n"
1163         << "\n"
1164         // Calculate global invocation primitive id, and use it to access the per-primitive buffer. From there, get the primitive index in the
1165         // vertex buffer and the blue color component.
1166         << "    if (gl_LocalInvocationIndex < wgTriangleCount) {\n"
1167         << "        const uint  primitiveID         = firstPrimitive + gl_LocalInvocationIndex;\n"
1168         << "        const uint  primitiveIndex      = primitive.data[primitiveID].index;\n"
1169         << "        const float blue                = primitive.data[primitiveID].color;\n"
1170         << "        const uint  firstVertexIndex    = primitiveIndex * 3u;\n"
1171         << "        const uvec3 globalVertexIndices = uvec3(firstVertexIndex, firstVertexIndex+1u, "
1172            "firstVertexIndex+2u);\n"
1173         << "        const uint  localPrimitiveID    = gl_LocalInvocationIndex;\n"
1174         << "        const uint  firstLocalVertex    = localPrimitiveID * 3u + " << kExtraVertices << "u;\n"
1175         << "        const uvec3 localVertexIndices  = uvec3(firstLocalVertex, firstLocalVertex+1u, "
1176            "firstLocalVertex+2u);\n"
1177         << "\n"
1178         << "        gl_MeshVerticesEXT[localVertexIndices.x].gl_Position = vec4(vertex.coords[globalVertexIndices.x], "
1179            "pc.depth, 1.0);\n"
1180         << "        gl_MeshVerticesEXT[localVertexIndices.y].gl_Position = vec4(vertex.coords[globalVertexIndices.y], "
1181            "pc.depth, 1.0);\n"
1182         << "        gl_MeshVerticesEXT[localVertexIndices.z].gl_Position = vec4(vertex.coords[globalVertexIndices.z], "
1183            "pc.depth, 1.0);\n"
1184         << "\n"
1185         << "        gl_PrimitiveTriangleIndicesEXT[localPrimitiveID] = localVertexIndices;\n"
1186         << "        primitiveColor[localPrimitiveID]                 = vec4(pc.red, 0.0, blue, 1.0f);\n"
1187         << "    }\n"
1188         << "}\n";
1189     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1190 }
1191 
pixelToFBCoords(uint32_t pixelId,uint32_t totalPixels)1192 inline float pixelToFBCoords(uint32_t pixelId, uint32_t totalPixels)
1193 {
1194     return (static_cast<float>(pixelId) + 0.5f) / static_cast<float>(totalPixels) * 2.0f - 1.0f;
1195 }
1196 
iterate()1197 tcu::TestStatus PartialUsageInstance::iterate()
1198 {
1199     const auto &vki             = m_context.getInstanceInterface();
1200     const auto &vkd             = m_context.getDeviceInterface();
1201     const auto physicalDevice   = m_context.getPhysicalDevice();
1202     const auto device           = m_context.getDevice();
1203     const auto queueIndex       = m_context.getUniversalQueueFamilyIndex();
1204     const auto queue            = m_context.getUniversalQueue();
1205     auto &alloc                 = m_context.getDefaultAllocator();
1206     const auto bufferUsage      = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
1207     const auto bufferDescType   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1208     const auto bufferDescStages = VK_SHADER_STAGE_MESH_BIT_EXT;
1209     const auto pcSize           = static_cast<VkDeviceSize>(sizeof(PartialUsageCase::PushConstants));
1210     const auto pcStages         = bufferDescStages;
1211     const auto pcRange          = makePushConstantRange(pcStages, 0u, static_cast<uint32_t>(pcSize));
1212     const auto fbExtent         = makeExtent3D(PartialUsageCase::kWidth, PartialUsageCase::kHeight, 1u);
1213     const tcu::IVec3 iExtent(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
1214                              static_cast<int>(fbExtent.depth));
1215     const auto colorFormat         = VK_FORMAT_R8G8B8A8_UNORM;
1216     const auto colorTcuFormat      = mapVkFormat(colorFormat);
1217     const auto dsFormat            = VK_FORMAT_D16_UNORM;
1218     const auto vertexSize          = sizeof(tcu::Vec2);
1219     const auto verticesPerTriangle = 3u;
1220     const auto pixelCount          = fbExtent.width * fbExtent.height * fbExtent.depth;
1221     const auto vertexCount         = pixelCount * verticesPerTriangle;
1222     const auto triangleSize        = vertexSize * verticesPerTriangle;
1223     const auto colorThreshold      = 0.005f; // 1/255 < 0.005 < 2/255
1224     const float fgRed              = 0.0f;
1225     const float bgRed              = 1.0f;
1226     const float bgBlue             = 1.0f;
1227 
1228     // Quarter of the pixel width and height in framebuffer coordinates.
1229     const float pixelWidth4  = 2.0f / (static_cast<float>(fbExtent.width) * 4.0f);
1230     const float pixelHeight4 = 2.0f / (static_cast<float>(fbExtent.height) * 4.0f);
1231 
1232     // Offsets for each triangle vertex from the pixel center.
1233     //    +-------------------+
1234     //    |         2         |
1235     //    |         x         |
1236     //    |        x x        |
1237     //    |       x   x       |
1238     //    |      x  x  x      |
1239     //    |     x       x     |
1240     //    |    xxxxxxxxxxx    |
1241     //    |   0           1   |
1242     //    +-------------------+
1243     const std::vector<tcu::Vec2> offsets{
1244         tcu::Vec2(-pixelWidth4, +pixelHeight4),
1245         tcu::Vec2(+pixelWidth4, +pixelHeight4),
1246         tcu::Vec2(0.0f, -pixelHeight4),
1247     };
1248 
1249     // We'll use two draw calls: triangles on the front and triangle that sets the background color, so we need two vertex buffers
1250     // and two primitive data buffers.
1251     const auto vertexBufferFrontSize = static_cast<VkDeviceSize>(triangleSize * pixelCount);
1252     const auto vertexBufferFrontInfo = makeBufferCreateInfo(vertexBufferFrontSize, bufferUsage);
1253     BufferWithMemory vertexBufferFront(vkd, device, alloc, vertexBufferFrontInfo, MemoryRequirement::HostVisible);
1254     auto &vertexBufferFrontAlloc = vertexBufferFront.getAllocation();
1255     void *vertexBufferFrontData  = vertexBufferFrontAlloc.getHostPtr();
1256 
1257     std::vector<tcu::Vec2> trianglePerPixel;
1258     trianglePerPixel.reserve(vertexCount);
1259 
1260     // Fill front vertex buffer.
1261     for (uint32_t y = 0u; y < PartialUsageCase::kHeight; ++y)
1262         for (uint32_t x = 0u; x < PartialUsageCase::kWidth; ++x)
1263             for (uint32_t v = 0u; v < verticesPerTriangle; ++v)
1264             {
1265                 const auto &offset = offsets.at(v);
1266                 const auto xCoord  = pixelToFBCoords(x, PartialUsageCase::kWidth) + offset.x();
1267                 const auto yCoord  = pixelToFBCoords(y, PartialUsageCase::kHeight) + offset.y();
1268                 trianglePerPixel.emplace_back(xCoord, yCoord);
1269             }
1270     deMemcpy(vertexBufferFrontData, trianglePerPixel.data(), de::dataSize(trianglePerPixel));
1271 
1272     // For the front triangles we will select some pixels randomly.
1273     using IndexAndColor = PartialUsageCase::IndexAndColor;
1274 
1275     std::set<uint32_t> selectedPixels;
1276     std::vector<IndexAndColor> indicesAndColors;
1277     de::Random rnd(1646058327u);
1278     const auto maxId           = static_cast<int>(pixelCount) - 1;
1279     const auto fTotalTriangles = static_cast<float>(PartialUsageCase::kTotalPrimitives);
1280 
1281     while (selectedPixels.size() < PartialUsageCase::kTotalPrimitives)
1282     {
1283         const auto pixelId = static_cast<uint32_t>(rnd.getInt(0, maxId));
1284         if (!selectedPixels.count(pixelId))
1285         {
1286             selectedPixels.insert(pixelId);
1287 
1288             const float colorVal = static_cast<float>(selectedPixels.size()) / fTotalTriangles;
1289             const IndexAndColor indexAndColor{pixelId, colorVal};
1290 
1291             indicesAndColors.push_back(indexAndColor);
1292         }
1293     }
1294 
1295     const auto primDataBufferFrontSize = static_cast<VkDeviceSize>(de::dataSize(indicesAndColors));
1296     const auto primDataBufferFrontInfo = makeBufferCreateInfo(primDataBufferFrontSize, bufferUsage);
1297     BufferWithMemory primDataBufferFront(vkd, device, alloc, primDataBufferFrontInfo, MemoryRequirement::HostVisible);
1298     auto &primDataBufferFrontAlloc = primDataBufferFront.getAllocation();
1299     void *primDataBufferFrontData  = primDataBufferFrontAlloc.getHostPtr();
1300     deMemcpy(primDataBufferFrontData, indicesAndColors.data(), de::dataSize(indicesAndColors));
1301 
1302     // Generate reference image based on the previous data.
1303     tcu::TextureLevel referenceLevel(colorTcuFormat, iExtent.x(), iExtent.y(), iExtent.z());
1304     tcu::PixelBufferAccess referenceAccess = referenceLevel.getAccess();
1305     const tcu::Vec4 bgColor(bgRed, 0.0f, bgBlue, 1.0f);
1306 
1307     tcu::clear(referenceAccess, bgColor);
1308     for (const auto &indexAndColor : indicesAndColors)
1309     {
1310         const int xCoord = static_cast<int>(indexAndColor.index % fbExtent.width);
1311         const int yCoord = static_cast<int>(indexAndColor.index / fbExtent.width);
1312         const tcu::Vec4 color(fgRed, 0.0f, indexAndColor.color, 1.0f);
1313 
1314         referenceAccess.setPixel(color, xCoord, yCoord);
1315     }
1316 
1317     // Background buffers. These will only contain one triangle.
1318     const std::vector<tcu::Vec2> backgroundTriangle{
1319         tcu::Vec2(-1.0f, -1.0f),
1320         tcu::Vec2(-1.0f, 3.0f),
1321         tcu::Vec2(3.0f, -1.0f),
1322     };
1323 
1324     const PartialUsageCase::IndexAndColor backgroundTriangleData{0u, bgBlue};
1325 
1326     const auto vertexBufferBackSize = static_cast<VkDeviceSize>(de::dataSize(backgroundTriangle));
1327     const auto vertexBufferBackInfo = makeBufferCreateInfo(vertexBufferBackSize, bufferUsage);
1328     BufferWithMemory vertexBufferBack(vkd, device, alloc, vertexBufferBackInfo, MemoryRequirement::HostVisible);
1329     auto &vertexBufferBackAlloc = vertexBufferBack.getAllocation();
1330     void *vertexBufferBackData  = vertexBufferBackAlloc.getHostPtr();
1331     deMemcpy(vertexBufferBackData, backgroundTriangle.data(), de::dataSize(backgroundTriangle));
1332 
1333     const auto primDataBufferBackSize = static_cast<VkDeviceSize>(sizeof(backgroundTriangleData));
1334     const auto primDataBufferBackInfo = makeBufferCreateInfo(primDataBufferBackSize, bufferUsage);
1335     BufferWithMemory primDataBufferBack(vkd, device, alloc, primDataBufferBackInfo, MemoryRequirement::HostVisible);
1336     auto &primDataBufferBackAlloc = primDataBufferBack.getAllocation();
1337     void *primDataBufferBackData  = primDataBufferBackAlloc.getHostPtr();
1338     deMemcpy(primDataBufferBackData, &backgroundTriangleData, sizeof(backgroundTriangleData));
1339 
1340     // Descriptor pool and descriptor sets.
1341     DescriptorPoolBuilder poolBuilder;
1342     poolBuilder.addType(bufferDescType, 4u);
1343     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1344 
1345     DescriptorSetLayoutBuilder setLayoutBuilder;
1346     setLayoutBuilder.addSingleBinding(bufferDescType, bufferDescStages);
1347     setLayoutBuilder.addSingleBinding(bufferDescType, bufferDescStages);
1348     const auto setLayout = setLayoutBuilder.build(vkd, device);
1349 
1350     const auto setFront = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1351     const auto setBack  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1352 
1353     // Update descriptor sets.
1354     DescriptorSetUpdateBuilder updateBuilder;
1355     {
1356         const auto bufferInfo = makeDescriptorBufferInfo(vertexBufferFront.get(), 0ull, vertexBufferFrontSize);
1357         updateBuilder.writeSingle(setFront.get(), DescriptorSetUpdateBuilder::Location::binding(0u), bufferDescType,
1358                                   &bufferInfo);
1359     }
1360     {
1361         const auto bufferInfo = makeDescriptorBufferInfo(primDataBufferFront.get(), 0ull, primDataBufferFrontSize);
1362         updateBuilder.writeSingle(setFront.get(), DescriptorSetUpdateBuilder::Location::binding(1u), bufferDescType,
1363                                   &bufferInfo);
1364     }
1365     {
1366         const auto bufferInfo = makeDescriptorBufferInfo(vertexBufferBack.get(), 0ull, vertexBufferBackSize);
1367         updateBuilder.writeSingle(setBack.get(), DescriptorSetUpdateBuilder::Location::binding(0u), bufferDescType,
1368                                   &bufferInfo);
1369     }
1370     {
1371         const auto bufferInfo = makeDescriptorBufferInfo(primDataBufferBack.get(), 0ull, primDataBufferBackSize);
1372         updateBuilder.writeSingle(setBack.get(), DescriptorSetUpdateBuilder::Location::binding(1u), bufferDescType,
1373                                   &bufferInfo);
1374     }
1375     updateBuilder.update(vkd, device);
1376 
1377     // Pipeline layout.
1378     const PipelineLayoutWrapper pipelineLayout(m_constructionType, vkd, device, setLayout.get(), &pcRange);
1379 
1380     // Shader modules.
1381     const auto &binaries  = m_context.getBinaryCollection();
1382     const auto taskShader = ShaderWrapper(vkd, device, binaries.get("task"));
1383     const auto meshShader = ShaderWrapper(vkd, device, binaries.get("mesh"));
1384     const auto fragShader = ShaderWrapper(vkd, device, binaries.get("frag"));
1385 
1386     // Render pass.
1387     const auto renderPass = makeRenderPass(vkd, device, colorFormat, dsFormat);
1388 
1389     // Color and depth/stencil buffers.
1390     const VkImageCreateInfo imageCreateInfo = {
1391         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1392         nullptr,                             // const void* pNext;
1393         0u,                                  // VkImageCreateFlags flags;
1394         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
1395         VK_FORMAT_UNDEFINED,                 // VkFormat format;
1396         fbExtent,                            // VkExtent3D extent;
1397         1u,                                  // uint32_t mipLevels;
1398         1u,                                  // uint32_t arrayLayers;
1399         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
1400         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
1401         0u,                                  // VkImageUsageFlags usage;
1402         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
1403         0u,                                  // uint32_t queueFamilyIndexCount;
1404         nullptr,                             // const uint32_t* pQueueFamilyIndices;
1405         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
1406     };
1407 
1408     std::unique_ptr<ImageWithMemory> colorAttachment;
1409     {
1410         auto colorAttCreateInfo   = imageCreateInfo;
1411         colorAttCreateInfo.format = colorFormat;
1412         colorAttCreateInfo.usage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1413 
1414         colorAttachment.reset(new ImageWithMemory(vkd, device, alloc, colorAttCreateInfo, MemoryRequirement::Any));
1415     }
1416 
1417     std::unique_ptr<ImageWithMemory> dsAttachment;
1418     {
1419         auto dsAttCreateInfo   = imageCreateInfo;
1420         dsAttCreateInfo.format = dsFormat;
1421         dsAttCreateInfo.usage  = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
1422 
1423         dsAttachment.reset(new ImageWithMemory(vkd, device, alloc, dsAttCreateInfo, MemoryRequirement::Any));
1424     }
1425 
1426     const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1427     const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1428     const auto dsSRR    = makeImageSubresourceRange(VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u);
1429 
1430     const auto colorView =
1431         makeImageView(vkd, device, colorAttachment->get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
1432     const auto dsView = makeImageView(vkd, device, dsAttachment->get(), VK_IMAGE_VIEW_TYPE_2D, dsFormat, dsSRR);
1433 
1434     // Create verification buffer.
1435     const auto verificationBufferSize =
1436         static_cast<VkDeviceSize>(tcu::getPixelSize(colorTcuFormat) * iExtent.x() * iExtent.y() * iExtent.z());
1437     const auto verificationBufferInfo = makeBufferCreateInfo(verificationBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1438     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
1439     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
1440     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
1441 
1442     // Framebuffer.
1443     const std::vector<VkImageView> fbViews{colorView.get(), dsView.get()};
1444     const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), static_cast<uint32_t>(fbViews.size()),
1445                                              de::dataOrNull(fbViews), fbExtent.width, fbExtent.height);
1446 
1447     // Viewports and scissors.
1448     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
1449     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
1450 
1451     // Pipeline.
1452     const VkStencilOpState stencilOpState              = {};
1453     const VkPipelineDepthStencilStateCreateInfo dsInfo = {
1454         VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
1455         nullptr,                                                    // const void* pNext;
1456         0u,                                                         // VkPipelineDepthStencilStateCreateFlags flags;
1457         VK_TRUE,                                                    // VkBool32 depthTestEnable;
1458         VK_TRUE,                                                    // VkBool32 depthWriteEnable;
1459         VK_COMPARE_OP_LESS,                                         // VkCompareOp depthCompareOp;
1460         VK_FALSE,                                                   // VkBool32 depthBoundsTestEnable;
1461         VK_FALSE,                                                   // VkBool32 stencilTestEnable;
1462         stencilOpState,                                             // VkStencilOpState front;
1463         stencilOpState,                                             // VkStencilOpState back;
1464         0.0f,                                                       // float minDepthBounds;
1465         1.0f,                                                       // float maxDepthBounds;
1466     };
1467 
1468     GraphicsPipelineWrapper pipelineMaker(vki, vkd, physicalDevice, device, m_context.getDeviceExtensions(),
1469                                           m_constructionType);
1470     makeMeshGraphicsPipeline(pipelineMaker, pipelineLayout, taskShader, meshShader, fragShader, renderPass.get(),
1471                              viewports, scissors, 0u, &dsInfo);
1472     const auto pipeline = pipelineMaker.getPipeline();
1473 
1474     // Command pool and buffer.
1475     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
1476     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1477     const auto cmdBuffer    = cmdBufferPtr.get();
1478 
1479     // Draw the triangles in the front, then the triangle in the back.
1480     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 1.0f);
1481     const float clearDepth      = 1.0f;
1482     const uint32_t clearStencil = 0u;
1483 
1484     const PartialUsageCase::PushConstants pcFront = {PartialUsageCase::kTotalPrimitives, 0.0f, fgRed};
1485     const PartialUsageCase::PushConstants pcBack  = {1u, 0.5f, bgRed};
1486 
1487     beginCommandBuffer(vkd, cmdBuffer);
1488     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor, clearDepth,
1489                     clearStencil);
1490     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
1491 
1492     // Front triangles.
1493     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &setFront.get(),
1494                               0u, nullptr);
1495     vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), pcStages, 0u, static_cast<uint32_t>(pcSize), &pcFront);
1496     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1497 
1498     // Back triangles.
1499     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &setBack.get(),
1500                               0u, nullptr);
1501     vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), pcStages, 0u, static_cast<uint32_t>(pcSize), &pcBack);
1502     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1503 
1504     endRenderPass(vkd, cmdBuffer);
1505 
1506     // Copy color attachment to verification buffer.
1507     const auto colorToTransferBarrier = makeImageMemoryBarrier(
1508         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1509         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment->get(), colorSRR);
1510     const auto transferToHostBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1511     const auto copyRegion            = makeBufferImageCopy(fbExtent, colorSRL);
1512 
1513     cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1514                                   VK_PIPELINE_STAGE_TRANSFER_BIT, &colorToTransferBarrier);
1515     vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1516                              verificationBuffer.get(), 1u, &copyRegion);
1517     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1518                              &transferToHostBarrier);
1519 
1520     endCommandBuffer(vkd, cmdBuffer);
1521     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1522 
1523     // Verify color attachment.
1524     invalidateAlloc(vkd, device, verificationBufferAlloc);
1525 
1526     tcu::ConstPixelBufferAccess resultAccess(colorTcuFormat, iExtent, verificationBufferData);
1527     auto &log = m_context.getTestContext().getLog();
1528     const tcu::Vec4 errorThreshold(colorThreshold, 0.0f, colorThreshold, 0.0f);
1529 
1530     if (!tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, errorThreshold,
1531                                     tcu::COMPARE_LOG_ON_ERROR))
1532         TCU_FAIL("Result does not match reference -- check log for details");
1533 
1534     return tcu::TestStatus::pass("Pass");
1535 }
1536 
1537 // Create a classic and a mesh shading pipeline using graphics pipeline libraries. Both pipelines will use the same fragment shader
1538 // pipeline library, and the fragment shader will use the gl_Layer built-in, which is per-primitive in mesh shaders and per-vertex
1539 // in vertex shaders.
1540 class SharedFragLibraryCase : public vkt::TestCase
1541 {
1542 public:
SharedFragLibraryCase(tcu::TestContext & testCtx,const std::string & name,PipelineConstructionType constructionType)1543     SharedFragLibraryCase(tcu::TestContext &testCtx, const std::string &name, PipelineConstructionType constructionType)
1544         : vkt::TestCase(testCtx, name)
1545         , m_constructionType(constructionType)
1546     {
1547     }
~SharedFragLibraryCase(void)1548     virtual ~SharedFragLibraryCase(void)
1549     {
1550     }
1551 
1552     void checkSupport(Context &context) const override;
1553     void initPrograms(vk::SourceCollections &programCollection) const override;
1554     TestInstance *createInstance(Context &context) const override;
1555 
1556     static std::vector<tcu::Vec4> getLayerColors(void);
1557 
1558 protected:
1559     PipelineConstructionType m_constructionType;
1560 };
1561 
1562 class SharedFragLibraryInstance : public vkt::TestInstance
1563 {
1564 public:
SharedFragLibraryInstance(Context & context,PipelineConstructionType constructionType)1565     SharedFragLibraryInstance(Context &context, PipelineConstructionType constructionType)
1566         : vkt::TestInstance(context)
1567         , m_constructionType(constructionType)
1568     {
1569     }
~SharedFragLibraryInstance(void)1570     virtual ~SharedFragLibraryInstance(void)
1571     {
1572     }
1573 
1574     tcu::TestStatus iterate(void) override;
1575 
1576 protected:
1577     PipelineConstructionType m_constructionType;
1578 };
1579 
getLayerColors(void)1580 std::vector<tcu::Vec4> SharedFragLibraryCase::getLayerColors(void)
1581 {
1582     std::vector<tcu::Vec4> layerColors{
1583         tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f),
1584         tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f),
1585         tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
1586     };
1587 
1588     return layerColors;
1589 }
1590 
checkSupport(Context & context) const1591 void SharedFragLibraryCase::checkSupport(Context &context) const
1592 {
1593     checkTaskMeshShaderSupportEXT(context, false /*requireTask*/, true /*requireMesh*/);
1594 
1595     if (context.getUsedApiVersion() < VK_API_VERSION_1_2)
1596         context.requireDeviceFunctionality("VK_EXT_shader_viewport_index_layer");
1597     else
1598     {
1599         // More fine-grained: we do not need shaderViewportIndex.
1600         const auto &vk12Features = context.getDeviceVulkan12Features();
1601         if (!vk12Features.shaderOutputLayer)
1602             TCU_THROW(NotSupportedError, "shaderOutputLayer not supported");
1603     }
1604 
1605     checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1606                                           m_constructionType);
1607 }
1608 
initPrograms(vk::SourceCollections & programCollection) const1609 void SharedFragLibraryCase::initPrograms(vk::SourceCollections &programCollection) const
1610 {
1611     const auto meshBuildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1612 
1613     const std::string vtxPositions = "vec2 positions[3] = vec2[](\n"
1614                                      "    vec2(-1.0, -1.0),\n"
1615                                      "    vec2(-1.0, 3.0),\n"
1616                                      "    vec2(3.0, -1.0)\n"
1617                                      ");\n";
1618 
1619     // The vertex shader emits geometry to layer 1.
1620     std::ostringstream vert;
1621     vert << "#version 450\n"
1622          << "#extension GL_ARB_shader_viewport_layer_array : enable\n"
1623          << "\n"
1624          << vtxPositions << "void main ()\n"
1625          << "{\n"
1626          << "    gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);\n"
1627          << "    gl_Layer = 1;\n"
1628          << "}\n";
1629     programCollection.glslSources.add("vert") << glu::VertexSource(vert.str());
1630     programCollection.glslSources.add("vert_1_2")
1631         << glu::VertexSource(vert.str())
1632         << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_5, 0u, true);
1633 
1634     // The mesh shader emits geometry to layer 2.
1635     std::ostringstream mesh;
1636     mesh << "#version 450\n"
1637          << "#extension GL_EXT_mesh_shader : enable\n"
1638          << "\n"
1639          << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
1640          << "layout (triangles) out;\n"
1641          << "layout (max_vertices=3, max_primitives=1) out;\n"
1642          << "\n"
1643          << "perprimitiveEXT out gl_MeshPerPrimitiveEXT {\n"
1644          << "   int gl_Layer;\n"
1645          << "} gl_MeshPrimitivesEXT[];\n"
1646          << "\n"
1647          << vtxPositions << "void main ()\n"
1648          << "{\n"
1649          << "    SetMeshOutputsEXT(3u, 1u);\n"
1650          << "    for (uint i = 0; i < 3; ++i)\n"
1651          << "        gl_MeshVerticesEXT[i].gl_Position = vec4(positions[i], 0.0, 1.0);\n"
1652          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
1653          << "    gl_MeshPrimitivesEXT[0].gl_Layer = 2;\n"
1654          << "}\n";
1655     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << meshBuildOptions;
1656 
1657     // The frag shader uses the gl_Layer built-in to choose an output color.
1658     const auto outColors = getLayerColors();
1659     DE_ASSERT(outColors.size() == 3);
1660 
1661     std::ostringstream frag;
1662     frag << "#version 450\n"
1663          << "\n"
1664          << "layout (location=0) out vec4 outColor;\n"
1665          << "\n"
1666          << "vec4 outColors[3] = vec4[](\n"
1667          << "    vec4" << outColors.at(0) << ",\n"
1668          << "    vec4" << outColors.at(1) << ",\n"
1669          << "    vec4" << outColors.at(2) << "\n"
1670          << ");\n"
1671          << "\n"
1672          << "void main ()\n"
1673          << "{\n"
1674          << "    outColor = outColors[gl_Layer];\n"
1675          << "}\n";
1676     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
1677 }
1678 
createInstance(Context & context) const1679 TestInstance *SharedFragLibraryCase::createInstance(Context &context) const
1680 {
1681     return new SharedFragLibraryInstance(context, m_constructionType);
1682 }
1683 
makeLibCreateInfo(VkGraphicsPipelineLibraryFlagsEXT flags,void * pNext=nullptr)1684 VkGraphicsPipelineLibraryCreateInfoEXT makeLibCreateInfo(VkGraphicsPipelineLibraryFlagsEXT flags, void *pNext = nullptr)
1685 {
1686     const VkGraphicsPipelineLibraryCreateInfoEXT createInfo = {
1687         VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT, // VkStructureType sType;
1688         pNext,                                                       // void* pNext;
1689         flags,                                                       // VkGraphicsPipelineLibraryFlagsEXT flags;
1690     };
1691 
1692     return createInfo;
1693 }
1694 
iterate(void)1695 tcu::TestStatus SharedFragLibraryInstance::iterate(void)
1696 {
1697     const auto &vkd        = m_context.getDeviceInterface();
1698     const auto &device     = m_context.getDevice();
1699     const auto queueIndex  = m_context.getUniversalQueueFamilyIndex();
1700     const auto queue       = m_context.getUniversalQueue();
1701     auto &alloc            = m_context.getDefaultAllocator();
1702     const auto layerColors = SharedFragLibraryCase::getLayerColors();
1703     const auto &clearColor = layerColors.front();
1704     const auto layerCount  = static_cast<uint32_t>(layerColors.size());
1705     const auto fbExtent    = makeExtent3D(1u, 1u, 1u);
1706     const tcu::IVec3 iExtent(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height),
1707                              static_cast<int>(layerCount));
1708     const auto fbFormat          = VK_FORMAT_R8G8B8A8_UNORM;
1709     const auto tcuFormat         = mapVkFormat(fbFormat);
1710     const auto pixelSize         = tcu::getPixelSize(tcuFormat);
1711     const auto pixelCount        = fbExtent.width * fbExtent.height * layerCount;
1712     const auto fbUsage           = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
1713     const bool optimized         = (m_constructionType == PIPELINE_CONSTRUCTION_TYPE_LINK_TIME_OPTIMIZED_LIBRARY);
1714     const auto libExtraFlags     = (optimized ? VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT : 0);
1715     const auto libCompileFlags   = (VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | libExtraFlags);
1716     const auto pipelineLinkFlags = (optimized ? VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT : 0);
1717 
1718     // Color buffer.
1719     const VkImageCreateInfo colorBufferCreateInfo = {
1720         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1721         nullptr,                             // const void* pNext;
1722         0u,                                  // VkImageCreateFlags flags;
1723         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
1724         fbFormat,                            // VkFormat format;
1725         fbExtent,                            // VkExtent3D extent;
1726         1u,                                  // uint32_t mipLevels;
1727         layerCount,                          // uint32_t arrayLayers;
1728         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
1729         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
1730         fbUsage,                             // VkImageUsageFlags usage;
1731         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
1732         0u,                                  // uint32_t queueFamilyIndexCount;
1733         nullptr,                             // const uint32_t* pQueueFamilyIndices;
1734         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
1735     };
1736 
1737     ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
1738     const auto colorBufferSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, layerCount);
1739     const auto colorBufferSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, layerCount);
1740     const auto colorBufferView =
1741         makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D_ARRAY, fbFormat, colorBufferSRR);
1742 
1743     // Render pass.
1744     const auto renderPass = makeRenderPass(vkd, device, fbFormat);
1745 
1746     // Framebuffer.
1747     const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), fbExtent.width,
1748                                              fbExtent.height, layerCount);
1749 
1750     // Verification buffer.
1751     const auto verificationBufferSize = static_cast<VkDeviceSize>(static_cast<int>(pixelCount) * pixelSize);
1752     const auto verificationBufferInfo = makeBufferCreateInfo(verificationBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1753     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
1754     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
1755     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
1756 
1757     // Pipeline layout (common).
1758     const auto pipelineLayout = makePipelineLayout(vkd, device);
1759 
1760     // Shader modules.
1761     const auto &binaries  = m_context.getBinaryCollection();
1762     const auto vertModule = createShaderModule(
1763         vkd, device, (m_context.contextSupports(VK_API_VERSION_1_2)) ? binaries.get("vert_1_2") : binaries.get("vert"));
1764     const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh"));
1765     const auto fragModule = createShaderModule(vkd, device, binaries.get("frag"));
1766 
1767     // Fragment output state library (common).
1768     const VkColorComponentFlags colorComponentFlags =
1769         (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
1770     const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
1771         VK_FALSE,             // VkBool32                    blendEnable
1772         VK_BLEND_FACTOR_ZERO, // VkBlendFactor            srcColorBlendFactor
1773         VK_BLEND_FACTOR_ZERO, // VkBlendFactor            dstColorBlendFactor
1774         VK_BLEND_OP_ADD,      // VkBlendOp                colorBlendOp
1775         VK_BLEND_FACTOR_ZERO, // VkBlendFactor            srcAlphaBlendFactor
1776         VK_BLEND_FACTOR_ZERO, // VkBlendFactor            dstAlphaBlendFactor
1777         VK_BLEND_OP_ADD,      // VkBlendOp                alphaBlendOp
1778         colorComponentFlags,  // VkColorComponentFlags    colorWriteMask
1779     };
1780 
1781     const VkPipelineColorBlendStateCreateInfo colorBlendState = {
1782         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1783         nullptr,                                                  // const void* pNext;
1784         0u,                                                       // VkPipelineColorBlendStateCreateFlags flags;
1785         VK_FALSE,                                                 // VkBool32 logicOpEnable;
1786         VK_LOGIC_OP_CLEAR,                                        // VkLogicOp logicOp;
1787         1u,                                                       // uint32_t attachmentCount;
1788         &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
1789         {0.0f, 0.0f, 0.0f, 0.0f},   // float blendConstants[4];
1790     };
1791 
1792     const VkPipelineMultisampleStateCreateInfo multisampleState = {
1793         VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType                                sType
1794         nullptr,               // const void*                                    pNext
1795         0u,                    // VkPipelineMultisampleStateCreateFlags        flags
1796         VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits                        rasterizationSamples
1797         VK_FALSE,              // VkBool32                                        sampleShadingEnable
1798         1.0f,                  // float                                        minSampleShading
1799         nullptr,               // const VkSampleMask*                            pSampleMask
1800         VK_FALSE,              // VkBool32                                        alphaToCoverageEnable
1801         VK_FALSE               // VkBool32                                        alphaToOneEnable
1802     };
1803 
1804     const auto fragOutputLibInfo = makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT);
1805 
1806     VkGraphicsPipelineCreateInfo fragOutputInfo = initVulkanStructure();
1807     fragOutputInfo.layout                       = pipelineLayout.get();
1808     fragOutputInfo.renderPass                   = renderPass.get();
1809     fragOutputInfo.pColorBlendState             = &colorBlendState;
1810     fragOutputInfo.pMultisampleState            = &multisampleState;
1811     fragOutputInfo.flags                        = libCompileFlags;
1812     fragOutputInfo.pNext                        = &fragOutputLibInfo;
1813 
1814     const auto fragOutputLib = createGraphicsPipeline(vkd, device, DE_NULL, &fragOutputInfo);
1815 
1816     // Fragment shader lib (shared among the classic and mesh pipelines).
1817     const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfo = initVulkanStructure();
1818 
1819     const VkPipelineShaderStageCreateInfo fragShaderStageCreateInfo = {
1820         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1821         nullptr,                                             // const void* pNext;
1822         0u,                                                  // VkPipelineShaderStageCreateFlags flags;
1823         VK_SHADER_STAGE_FRAGMENT_BIT,                        // VkShaderStageFlagBits stage;
1824         fragModule.get(),                                    // VkShaderModule module;
1825         "main",                                              // const char* pName;
1826         nullptr,                                             // const VkSpecializationInfo* pSpecializationInfo;
1827     };
1828 
1829     const auto fragShaderLibInfo = makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT);
1830 
1831     VkGraphicsPipelineCreateInfo fragShaderInfo = initVulkanStructure();
1832     fragShaderInfo.layout                       = pipelineLayout.get();
1833     fragShaderInfo.renderPass                   = renderPass.get();
1834     fragShaderInfo.pMultisampleState            = &multisampleState;
1835     fragShaderInfo.pDepthStencilState           = &depthStencilStateCreateInfo;
1836     fragShaderInfo.stageCount                   = 1u;
1837     fragShaderInfo.pStages                      = &fragShaderStageCreateInfo;
1838     fragShaderInfo.flags                        = libCompileFlags;
1839     fragShaderInfo.pNext                        = &fragShaderLibInfo;
1840 
1841     const auto fragShaderLib = createGraphicsPipeline(vkd, device, DE_NULL, &fragShaderInfo);
1842 
1843     // Vertex input state (common, but should be unused by the mesh shading pipeline).
1844     const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = initVulkanStructure();
1845     VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo   = initVulkanStructure();
1846     inputAssemblyStateCreateInfo.topology                                 = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
1847     const auto vertexInputLibInfo = makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT);
1848 
1849     VkGraphicsPipelineCreateInfo vertexInputInfo = initVulkanStructure();
1850     vertexInputInfo.layout                       = pipelineLayout.get();
1851     vertexInputInfo.pVertexInputState            = &vertexInputStateCreateInfo;
1852     vertexInputInfo.pInputAssemblyState          = &inputAssemblyStateCreateInfo;
1853     vertexInputInfo.flags                        = libCompileFlags;
1854     vertexInputInfo.pNext                        = &vertexInputLibInfo;
1855 
1856     const auto vertexInputLib = createGraphicsPipeline(vkd, device, DE_NULL, &vertexInputInfo);
1857 
1858     // Pre-rasterization shader state: common pieces.
1859     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
1860     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
1861 
1862     const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
1863         VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType;
1864         nullptr,                                               // const void* pNext;
1865         0u,                                                    // VkPipelineViewportStateCreateFlags flags;
1866         static_cast<uint32_t>(viewports.size()),               // uint32_t viewportCount;
1867         de::dataOrNull(viewports),                             // const VkViewport* pViewports;
1868         static_cast<uint32_t>(scissors.size()),                // uint32_t scissorCount;
1869         de::dataOrNull(scissors),                              // const VkRect2D* pScissors;
1870     };
1871 
1872     const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo = {
1873         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1874         nullptr,                                                    // const void* pNext;
1875         0u,                                                         // VkPipelineRasterizationStateCreateFlags flags;
1876         VK_FALSE,                                                   // VkBool32 depthClampEnable;
1877         VK_FALSE,                                                   // VkBool32 rasterizerDiscardEnable;
1878         VK_POLYGON_MODE_FILL,                                       // VkPolygonMode polygonMode;
1879         VK_CULL_MODE_NONE,                                          // VkCullModeFlags cullMode;
1880         VK_FRONT_FACE_COUNTER_CLOCKWISE,                            // VkFrontFace frontFace;
1881         VK_FALSE,                                                   // VkBool32 depthBiasEnable;
1882         0.0f,                                                       // float depthBiasConstantFactor;
1883         0.0f,                                                       // float depthBiasClamp;
1884         0.0f,                                                       // float depthBiasSlopeFactor;
1885         1.0f,                                                       // float lineWidth;
1886     };
1887 
1888     const auto preRastLibInfo = makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT);
1889 
1890     VkGraphicsPipelineCreateInfo preRastShaderInfo = initVulkanStructure();
1891     preRastShaderInfo.layout                       = pipelineLayout.get();
1892     preRastShaderInfo.pViewportState               = &viewportStateCreateInfo;
1893     preRastShaderInfo.pRasterizationState          = &rasterizationStateCreateInfo;
1894     preRastShaderInfo.renderPass                   = renderPass.get();
1895     preRastShaderInfo.flags                        = libCompileFlags;
1896     preRastShaderInfo.pNext                        = &preRastLibInfo;
1897     preRastShaderInfo.stageCount                   = 1u;
1898 
1899     // Vertex stage info.
1900     const VkPipelineShaderStageCreateInfo vertShaderStageCreateInfo = {
1901         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1902         nullptr,                                             // const void* pNext;
1903         0u,                                                  // VkPipelineShaderStageCreateFlags flags;
1904         VK_SHADER_STAGE_VERTEX_BIT,                          // VkShaderStageFlagBits stage;
1905         vertModule.get(),                                    // VkShaderModule module;
1906         "main",                                              // const char* pName;
1907         nullptr,                                             // const VkSpecializationInfo* pSpecializationInfo;
1908     };
1909 
1910     // Mesh stage info.
1911     const VkPipelineShaderStageCreateInfo meshShaderStageCreateInfo = {
1912         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
1913         nullptr,                                             // const void* pNext;
1914         0u,                                                  // VkPipelineShaderStageCreateFlags flags;
1915         VK_SHADER_STAGE_MESH_BIT_EXT,                        // VkShaderStageFlagBits stage;
1916         meshModule.get(),                                    // VkShaderModule module;
1917         "main",                                              // const char* pName;
1918         nullptr,                                             // const VkSpecializationInfo* pSpecializationInfo;
1919     };
1920 
1921     // Pre-rasterization shader libs.
1922     preRastShaderInfo.pStages    = &vertShaderStageCreateInfo;
1923     const auto preRastClassicLib = createGraphicsPipeline(vkd, device, DE_NULL, &preRastShaderInfo);
1924 
1925     preRastShaderInfo.pStages = &meshShaderStageCreateInfo;
1926     const auto preRastMeshLib = createGraphicsPipeline(vkd, device, DE_NULL, &preRastShaderInfo);
1927 
1928     // Pipelines.
1929     const std::vector<VkPipeline> classicLibs{vertexInputLib.get(), preRastClassicLib.get(), fragShaderLib.get(),
1930                                               fragOutputLib.get()};
1931     const std::vector<VkPipeline> meshLibs{vertexInputLib.get(), preRastMeshLib.get(), fragShaderLib.get(),
1932                                            fragOutputLib.get()};
1933 
1934     const VkPipelineLibraryCreateInfoKHR classicLinkInfo = {
1935         VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR, // VkStructureType sType;
1936         nullptr,                                            // const void* pNext;
1937         static_cast<uint32_t>(classicLibs.size()),          // uint32_t libraryCount;
1938         de::dataOrNull(classicLibs),                        // const VkPipeline* pLibraries;
1939     };
1940 
1941     const VkPipelineLibraryCreateInfoKHR meshLinkInfo = {
1942         VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR, // VkStructureType sType;
1943         nullptr,                                            // const void* pNext;
1944         static_cast<uint32_t>(meshLibs.size()),             // uint32_t libraryCount;
1945         de::dataOrNull(meshLibs),                           // const VkPipeline* pLibraries;
1946     };
1947 
1948     VkGraphicsPipelineCreateInfo classicPipelineCreateInfo = initVulkanStructure();
1949     classicPipelineCreateInfo.flags                        = pipelineLinkFlags;
1950     classicPipelineCreateInfo.layout                       = pipelineLayout.get();
1951     classicPipelineCreateInfo.pNext                        = &classicLinkInfo;
1952 
1953     VkGraphicsPipelineCreateInfo meshPipelineCreateInfo = initVulkanStructure();
1954     meshPipelineCreateInfo.flags                        = pipelineLinkFlags;
1955     meshPipelineCreateInfo.layout                       = pipelineLayout.get();
1956     meshPipelineCreateInfo.pNext                        = &meshLinkInfo;
1957 
1958     const auto classicPipeline = createGraphicsPipeline(vkd, device, DE_NULL, &classicPipelineCreateInfo);
1959     const auto meshPipeline    = createGraphicsPipeline(vkd, device, DE_NULL, &meshPipelineCreateInfo);
1960 
1961     // Record commands with both pipelines.
1962     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
1963     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1964     const auto cmdBuffer    = cmdBufferPtr.get();
1965 
1966     beginCommandBuffer(vkd, cmdBuffer);
1967 
1968     // Draw using both pipelines.
1969     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor);
1970     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, classicPipeline.get());
1971     vkd.cmdDraw(cmdBuffer, 3u, 1u, 0u, 0u);
1972     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
1973     vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1974     endRenderPass(vkd, cmdBuffer);
1975 
1976     // Copy color buffer to verification buffer.
1977     const auto preTransferBarrier = makeImageMemoryBarrier(
1978         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1979         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorBufferSRR);
1980 
1981     const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1982 
1983     const auto copyRegion = makeBufferImageCopy(fbExtent, colorBufferSRL);
1984 
1985     cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1986                                   VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
1987     vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1988                              verificationBuffer.get(), 1u, &copyRegion);
1989     cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1990                              &postTransferBarrier);
1991 
1992     endCommandBuffer(vkd, cmdBuffer);
1993     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1994 
1995     // Validate color buffer.
1996     invalidateAlloc(vkd, device, verificationBufferAlloc);
1997 
1998     tcu::ConstPixelBufferAccess resultAccess(tcuFormat, iExtent, verificationBufferData);
1999     auto &log = m_context.getTestContext().getLog();
2000     bool fail = false;
2001 
2002     for (int z = 0; z < iExtent.z(); ++z)
2003     {
2004         const auto &expectedColor = layerColors.at(z);
2005         for (int y = 0; y < iExtent.y(); ++y)
2006             for (int x = 0; x < iExtent.x(); ++x)
2007             {
2008                 const auto resultColor = resultAccess.getPixel(x, y, z);
2009                 if (resultColor != expectedColor)
2010                 {
2011                     std::ostringstream msg;
2012                     msg << "Unexpected color at coordinates (x=" << x << ", y=" << y << ", layer=" << z
2013                         << "): expected " << expectedColor << " but found " << resultColor;
2014                     log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
2015                     fail = true;
2016                 }
2017             }
2018     }
2019 
2020     if (fail)
2021         return tcu::TestStatus::fail("Failed; check log for details");
2022     return tcu::TestStatus::pass("Pass");
2023 }
2024 
2025 } // anonymous namespace
2026 
createMeshShaderSmokeTestsEXT(tcu::TestContext & testCtx)2027 tcu::TestCaseGroup *createMeshShaderSmokeTestsEXT(tcu::TestContext &testCtx)
2028 {
2029     struct
2030     {
2031         PipelineConstructionType constructionType;
2032         const char *name;
2033     } constructionTypes[] = {
2034         {PIPELINE_CONSTRUCTION_TYPE_MONOLITHIC, "monolithic"},
2035         {PIPELINE_CONSTRUCTION_TYPE_LINK_TIME_OPTIMIZED_LIBRARY, "optimized_lib"},
2036         {PIPELINE_CONSTRUCTION_TYPE_FAST_LINKED_LIBRARY, "fast_lib"},
2037     };
2038 
2039     GroupPtr smokeTests(new tcu::TestCaseGroup(testCtx, "smoke"));
2040 
2041     for (const auto &constructionCase : constructionTypes)
2042     {
2043         GroupPtr constructionGroup(new tcu::TestCaseGroup(testCtx, constructionCase.name));
2044 
2045         const auto &cType = constructionCase.constructionType;
2046 
2047         constructionGroup->addChild(new MeshOnlyTriangleCase(testCtx, "mesh_shader_triangle", cType));
2048         constructionGroup->addChild(new MeshOnlyTriangleCase(testCtx, "mesh_shader_triangle_rasterization_disabled",
2049                                                              cType, true /*rasterizationDisabled*/));
2050         constructionGroup->addChild(new MeshTaskTriangleCase(testCtx, "mesh_task_shader_triangle", cType));
2051         constructionGroup->addChild(new TaskOnlyTriangleCase(testCtx, "task_only_shader_triangle", cType));
2052 
2053         for (int i = 0; i < 2; ++i)
2054         {
2055             const bool compaction        = (i == 0);
2056             const std::string nameSuffix = (compaction ? "" : "_without_compaction");
2057             const PartialUsageParams params{cType, compaction};
2058 
2059             constructionGroup->addChild(new PartialUsageCase(testCtx, "partial_usage" + nameSuffix, params));
2060         }
2061 
2062         addFunctionCaseWithPrograms(constructionGroup.get(), "fullscreen_gradient", checkMeshSupport,
2063                                     initGradientPrograms, testFullscreenGradient,
2064                                     GradientParams(tcu::nothing<FragmentSize>(), cType));
2065         addFunctionCaseWithPrograms(constructionGroup.get(), "fullscreen_gradient_fs2x2", checkMeshSupport,
2066                                     initGradientPrograms, testFullscreenGradient,
2067                                     GradientParams(tcu::just(FragmentSize::SIZE_2X2), cType));
2068         addFunctionCaseWithPrograms(constructionGroup.get(), "fullscreen_gradient_fs2x1", checkMeshSupport,
2069                                     initGradientPrograms, testFullscreenGradient,
2070                                     GradientParams(tcu::just(FragmentSize::SIZE_2X1), cType));
2071 
2072         if (cType != PIPELINE_CONSTRUCTION_TYPE_MONOLITHIC)
2073         {
2074             constructionGroup->addChild(new SharedFragLibraryCase(testCtx, "shared_frag_library", cType));
2075         }
2076 
2077         smokeTests->addChild(constructionGroup.release());
2078     }
2079 
2080     return smokeTests.release();
2081 }
2082 
2083 } // namespace MeshShader
2084 } // namespace vkt
2085