xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/mesh_shader/vktMeshShaderMiscTestsEXT.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Mesh Shader Misc Tests for VK_EXT_mesh_shader
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMeshShaderMiscTests.hpp"
26 #include "vktMeshShaderUtil.hpp"
27 #include "vktTestCase.hpp"
28 
29 #include "vkBuilderUtil.hpp"
30 #include "vkImageWithMemory.hpp"
31 #include "vkBufferWithMemory.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkImageUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 
38 #include "tcuDefs.hpp"
39 #include "tcuVectorType.hpp"
40 #include "tcuImageCompare.hpp"
41 #include "tcuTexture.hpp"
42 #include "tcuTextureUtil.hpp"
43 #include "tcuMaybe.hpp"
44 #include "tcuStringTemplate.hpp"
45 #include "tcuTestLog.hpp"
46 
47 #include "deRandom.hpp"
48 
49 #include <cstdint>
50 #include <memory>
51 #include <utility>
52 #include <vector>
53 #include <string>
54 #include <sstream>
55 #include <map>
56 #include <type_traits>
57 #include <limits>
58 
59 namespace vkt
60 {
61 namespace MeshShader
62 {
63 
64 namespace
65 {
66 
67 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
68 
69 using namespace vk;
70 
71 // Output images will use this format.
getOutputFormat()72 VkFormat getOutputFormat()
73 {
74     return VK_FORMAT_R8G8B8A8_UNORM;
75 }
76 
77 // Threshold that's reasonable for the previous format.
getCompareThreshold()78 float getCompareThreshold()
79 {
80     return 0.005f; // 1/256 < 0.005 < 2/256
81 }
82 
83 // Check mesh shader support.
genericCheckSupport(Context & context,bool requireTaskShader,bool requireVertexStores)84 void genericCheckSupport(Context &context, bool requireTaskShader, bool requireVertexStores)
85 {
86     checkTaskMeshShaderSupportEXT(context, requireTaskShader, true);
87 
88     if (requireVertexStores)
89     {
90         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
91     }
92 }
93 
94 struct MiscTestParams
95 {
96     tcu::Maybe<tcu::UVec3> taskCount;
97     tcu::UVec3 meshCount;
98 
99     uint32_t width;
100     uint32_t height;
101 
MiscTestParamsvkt::MeshShader::__anonb7c155300111::MiscTestParams102     MiscTestParams(const tcu::Maybe<tcu::UVec3> &taskCount_, const tcu::UVec3 &meshCount_, uint32_t width_,
103                    uint32_t height_)
104         : taskCount(taskCount_)
105         , meshCount(meshCount_)
106         , width(width_)
107         , height(height_)
108     {
109     }
110 
111     // Makes the class polymorphic and allows the right destructor to be used for subclasses.
~MiscTestParamsvkt::MeshShader::__anonb7c155300111::MiscTestParams112     virtual ~MiscTestParams()
113     {
114     }
115 
needsTaskShadervkt::MeshShader::__anonb7c155300111::MiscTestParams116     bool needsTaskShader() const
117     {
118         return static_cast<bool>(taskCount);
119     }
120 
drawCountvkt::MeshShader::__anonb7c155300111::MiscTestParams121     tcu::UVec3 drawCount() const
122     {
123         if (needsTaskShader())
124             return taskCount.get();
125         return meshCount;
126     }
127 };
128 
129 using ParamsPtr = std::unique_ptr<MiscTestParams>;
130 
131 class MeshShaderMiscCase : public vkt::TestCase
132 {
133 public:
134     MeshShaderMiscCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params);
~MeshShaderMiscCase(void)135     virtual ~MeshShaderMiscCase(void)
136     {
137     }
138 
139     void checkSupport(Context &context) const override;
140     void initPrograms(vk::SourceCollections &programCollection) const override;
141 
142 protected:
143     std::unique_ptr<MiscTestParams> m_params;
144 };
145 
MeshShaderMiscCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)146 MeshShaderMiscCase::MeshShaderMiscCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
147     : vkt::TestCase(testCtx, name)
148     , m_params(params.release())
149 {
150 }
151 
checkSupport(Context & context) const152 void MeshShaderMiscCase::checkSupport(Context &context) const
153 {
154     genericCheckSupport(context, m_params->needsTaskShader(), /*requireVertexStores*/ false);
155 }
156 
157 // Adds the generic fragment shader. To be called by subclasses.
initPrograms(vk::SourceCollections & programCollection) const158 void MeshShaderMiscCase::initPrograms(vk::SourceCollections &programCollection) const
159 {
160     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
161 
162     std::string frag = "#version 450\n"
163                        "#extension GL_EXT_mesh_shader : enable\n"
164                        "\n"
165                        "layout (location=0) in perprimitiveEXT vec4 primitiveColor;\n"
166                        "layout (location=0) out vec4 outColor;\n"
167                        "\n"
168                        "void main ()\n"
169                        "{\n"
170                        "    outColor = primitiveColor;\n"
171                        "}\n";
172     programCollection.glslSources.add("frag") << glu::FragmentSource(frag) << buildOptions;
173 }
174 
175 class MeshShaderMiscInstance : public vkt::TestInstance
176 {
177 public:
MeshShaderMiscInstance(Context & context,const MiscTestParams * params)178     MeshShaderMiscInstance(Context &context, const MiscTestParams *params)
179         : vkt::TestInstance(context)
180         , m_params(params)
181         , m_referenceLevel()
182     {
183     }
184 
185     void generateSolidRefLevel(const tcu::Vec4 &color, std::unique_ptr<tcu::TextureLevel> &output);
186     virtual void generateReferenceLevel() = 0;
187 
188     virtual bool verifyResult(const tcu::ConstPixelBufferAccess &resultAccess,
189                               const tcu::TextureLevel &referenceLevel) const;
190     virtual bool verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const;
191     tcu::TestStatus iterate() override;
192 
193 protected:
194     const MiscTestParams *m_params;
195     std::unique_ptr<tcu::TextureLevel> m_referenceLevel;
196 };
197 
generateSolidRefLevel(const tcu::Vec4 & color,std::unique_ptr<tcu::TextureLevel> & output)198 void MeshShaderMiscInstance::generateSolidRefLevel(const tcu::Vec4 &color, std::unique_ptr<tcu::TextureLevel> &output)
199 {
200     const auto format    = getOutputFormat();
201     const auto tcuFormat = mapVkFormat(format);
202 
203     const auto iWidth  = static_cast<int>(m_params->width);
204     const auto iHeight = static_cast<int>(m_params->height);
205 
206     output.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
207 
208     const auto access = output->getAccess();
209 
210     // Fill with solid color.
211     tcu::clear(access, color);
212 }
213 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const214 bool MeshShaderMiscInstance::verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const
215 {
216     return verifyResult(resultAccess, *m_referenceLevel);
217 }
218 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess,const tcu::TextureLevel & referenceLevel) const219 bool MeshShaderMiscInstance::verifyResult(const tcu::ConstPixelBufferAccess &resultAccess,
220                                           const tcu::TextureLevel &referenceLevel) const
221 {
222     const auto referenceAccess = referenceLevel.getAccess();
223 
224     const auto refWidth  = referenceAccess.getWidth();
225     const auto refHeight = referenceAccess.getHeight();
226     const auto refDepth  = referenceAccess.getDepth();
227 
228     const auto resWidth  = resultAccess.getWidth();
229     const auto resHeight = resultAccess.getHeight();
230     const auto resDepth  = resultAccess.getDepth();
231 
232     DE_ASSERT(resWidth == refWidth || resHeight == refHeight || resDepth == refDepth);
233 
234     // For release builds.
235     DE_UNREF(refWidth);
236     DE_UNREF(refHeight);
237     DE_UNREF(refDepth);
238     DE_UNREF(resWidth);
239     DE_UNREF(resHeight);
240     DE_UNREF(resDepth);
241 
242     const auto outputFormat   = getOutputFormat();
243     const auto expectedFormat = mapVkFormat(outputFormat);
244     const auto resFormat      = resultAccess.getFormat();
245     const auto refFormat      = referenceAccess.getFormat();
246 
247     DE_ASSERT(resFormat == expectedFormat && refFormat == expectedFormat);
248 
249     // For release builds
250     DE_UNREF(expectedFormat);
251     DE_UNREF(resFormat);
252     DE_UNREF(refFormat);
253 
254     auto &log            = m_context.getTestContext().getLog();
255     const auto threshold = getCompareThreshold();
256     const tcu::Vec4 thresholdVec(threshold, threshold, threshold, threshold);
257 
258     return tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, thresholdVec,
259                                       tcu::COMPARE_LOG_ON_ERROR);
260 }
261 
iterate()262 tcu::TestStatus MeshShaderMiscInstance::iterate()
263 {
264     const auto &vkd       = m_context.getDeviceInterface();
265     const auto device     = m_context.getDevice();
266     auto &alloc           = m_context.getDefaultAllocator();
267     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
268     const auto queue      = m_context.getUniversalQueue();
269 
270     const auto imageFormat = getOutputFormat();
271     const auto tcuFormat   = mapVkFormat(imageFormat);
272     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
273     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
274 
275     const VkImageCreateInfo colorBufferInfo = {
276         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
277         nullptr,                             // const void* pNext;
278         0u,                                  // VkImageCreateFlags flags;
279         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
280         imageFormat,                         // VkFormat format;
281         imageExtent,                         // VkExtent3D extent;
282         1u,                                  // uint32_t mipLevels;
283         1u,                                  // uint32_t arrayLayers;
284         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
285         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
286         imageUsage,                          // VkImageUsageFlags usage;
287         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
288         0u,                                  // uint32_t queueFamilyIndexCount;
289         nullptr,                             // const uint32_t* pQueueFamilyIndices;
290         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
291     };
292 
293     // Create color image and view.
294     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
295     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
296     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
297     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
298 
299     // Create a memory buffer for verification.
300     const auto verificationBufferSize =
301         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
302     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
303     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
304 
305     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
306     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
307     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
308 
309     // Pipeline layout.
310     const auto pipelineLayout = makePipelineLayout(vkd, device);
311 
312     // Shader modules.
313     const auto &binaries = m_context.getBinaryCollection();
314     const auto hasTask   = binaries.contains("task");
315 
316     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
317     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
318 
319     Move<VkShaderModule> taskShader;
320     if (hasTask)
321         taskShader = createShaderModule(vkd, device, binaries.get("task"));
322 
323     // Render pass.
324     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
325 
326     // Framebuffer.
327     const auto framebuffer =
328         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
329 
330     // Viewport and scissor.
331     const std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
332     const std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
333 
334     // Color blending.
335     const auto colorWriteMask =
336         (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
337     const VkPipelineColorBlendAttachmentState blendAttState = {
338         VK_TRUE,             // VkBool32 blendEnable;
339         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
340         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstColorBlendFactor;
341         VK_BLEND_OP_ADD,     // VkBlendOp colorBlendOp;
342         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
343         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstAlphaBlendFactor;
344         VK_BLEND_OP_ADD,     // VkBlendOp alphaBlendOp;
345         colorWriteMask,      // VkColorComponentFlags colorWriteMask;
346     };
347 
348     const VkPipelineColorBlendStateCreateInfo colorBlendInfo = {
349         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
350         nullptr,                                                  // const void* pNext;
351         0u,                                                       // VkPipelineColorBlendStateCreateFlags flags;
352         VK_FALSE,                                                 // VkBool32 logicOpEnable;
353         VK_LOGIC_OP_OR,                                           // VkLogicOp logicOp;
354         1u,                                                       // uint32_t attachmentCount;
355         &blendAttState,           // const VkPipelineColorBlendAttachmentState* pAttachments;
356         {0.0f, 0.0f, 0.0f, 0.0f}, // float blendConstants[4];
357     };
358 
359     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
360                                                fragShader.get(), renderPass.get(), viewports, scissors, 0u /*subpass*/,
361                                                nullptr, nullptr, nullptr, &colorBlendInfo);
362 
363     // Command pool and buffer.
364     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
365     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
366     const auto cmdBuffer    = cmdBufferPtr.get();
367 
368     beginCommandBuffer(vkd, cmdBuffer);
369 
370     // Run pipeline.
371     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
372     const auto drawCount = m_params->drawCount();
373     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
374     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
375     vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
376     endRenderPass(vkd, cmdBuffer);
377 
378     // Copy color buffer to verification buffer.
379     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
380     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
381     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
382     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
383 
384     const auto preCopyBarrier =
385         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
386                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
387     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
388     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
389 
390     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
391                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
392     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
393                              verificationBuffer.get(), 1u, &copyRegion);
394     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
395                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
396 
397     endCommandBuffer(vkd, cmdBuffer);
398     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
399 
400     // Generate reference image and compare results.
401     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
402     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
403 
404     generateReferenceLevel();
405     invalidateAlloc(vkd, device, verificationBufferAlloc);
406     if (!verifyResult(verificationAccess))
407         TCU_FAIL("Result does not match reference; check log for details");
408 
409     return tcu::TestStatus::pass("Pass");
410 }
411 
412 // Verify passing more complex data between the task and mesh shaders.
413 class ComplexTaskDataCase : public MeshShaderMiscCase
414 {
415 public:
ComplexTaskDataCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)416     ComplexTaskDataCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
417         : MeshShaderMiscCase(testCtx, name, std::move(params))
418     {
419     }
420 
421     void initPrograms(vk::SourceCollections &programCollection) const override;
422     TestInstance *createInstance(Context &context) const override;
423 };
424 
425 class ComplexTaskDataInstance : public MeshShaderMiscInstance
426 {
427 public:
ComplexTaskDataInstance(Context & context,const MiscTestParams * params)428     ComplexTaskDataInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
429     {
430     }
431 
432     void generateReferenceLevel() override;
433 };
434 
generateReferenceLevel()435 void ComplexTaskDataInstance::generateReferenceLevel()
436 {
437     const auto format    = getOutputFormat();
438     const auto tcuFormat = mapVkFormat(format);
439 
440     const auto iWidth  = static_cast<int>(m_params->width);
441     const auto iHeight = static_cast<int>(m_params->height);
442 
443     const auto halfWidth  = iWidth / 2;
444     const auto halfHeight = iHeight / 2;
445 
446     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
447 
448     const auto access = m_referenceLevel->getAccess();
449 
450     // Each image quadrant gets a different color.
451     for (int y = 0; y < iHeight; ++y)
452         for (int x = 0; x < iWidth; ++x)
453         {
454             const float red     = ((y < halfHeight) ? 0.0f : 1.0f);
455             const float green   = ((x < halfWidth) ? 0.0f : 1.0f);
456             const auto refColor = tcu::Vec4(red, green, 1.0f, 1.0f);
457             access.setPixel(refColor, x, y);
458         }
459 }
460 
initPrograms(vk::SourceCollections & programCollection) const461 void ComplexTaskDataCase::initPrograms(vk::SourceCollections &programCollection) const
462 {
463     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
464 
465     // Add the generic fragment shader.
466     MeshShaderMiscCase::initPrograms(programCollection);
467 
468     const std::string taskDataDecl = "struct RowId {\n"
469                                      "    uint id;\n"
470                                      "};\n"
471                                      "\n"
472                                      "struct WorkGroupData {\n"
473                                      "    float WorkGroupIdPlusOnex1000Iota[10];\n"
474                                      "    RowId rowId;\n"
475                                      "    uvec3 WorkGroupIdPlusOnex2000Iota;\n"
476                                      "    vec2  WorkGroupIdPlusOnex3000Iota;\n"
477                                      "};\n"
478                                      "\n"
479                                      "struct ExternalData {\n"
480                                      "    float OneMillion;\n"
481                                      "    uint  TwoMillion;\n"
482                                      "    WorkGroupData workGroupData;\n"
483                                      "};\n"
484                                      "\n"
485                                      "struct TaskData {\n"
486                                      "    uint yes;\n"
487                                      "    ExternalData externalData;\n"
488                                      "};\n"
489                                      "taskPayloadSharedEXT TaskData td;\n";
490 
491     {
492         std::ostringstream task;
493         task << "#version 450\n"
494              << "#extension GL_EXT_mesh_shader : enable\n"
495              << "\n"
496              << "layout (local_size_x=1) in;\n"
497              << "\n"
498              << taskDataDecl << "\n"
499              << "void main ()\n"
500              << "{\n"
501              << "    td.yes = 1u;\n"
502              << "    td.externalData.OneMillion = 1000000.0;\n"
503              << "    td.externalData.TwoMillion = 2000000u;\n"
504              << "    for (uint i = 0; i < 10; i++) {\n"
505              << "        td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] = float((gl_WorkGroupID.x + 1u) "
506                 "* 1000 + i);\n"
507              << "    }\n"
508              << "    {\n"
509              << "        uint baseVal = (gl_WorkGroupID.x + 1u) * 2000;\n"
510              << "        td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota = uvec3(baseVal, baseVal + 1, "
511                 "baseVal + 2);\n"
512              << "    }\n"
513              << "    {\n"
514              << "        uint baseVal = (gl_WorkGroupID.x + 1u) * 3000;\n"
515              << "        td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota = vec2(baseVal, baseVal + 1);\n"
516              << "    }\n"
517              << "    td.externalData.workGroupData.rowId.id = gl_WorkGroupID.x;\n"
518              << "    EmitMeshTasksEXT(2u, 1u, 1u);\n"
519              << "}\n";
520         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
521     }
522 
523     {
524         std::ostringstream mesh;
525         mesh
526             << "#version 450\n"
527             << "#extension GL_EXT_mesh_shader : enable\n"
528             << "\n"
529             << "layout(local_size_x=2) in;\n"
530             << "layout(triangles) out;\n"
531             << "layout(max_vertices=4, max_primitives=2) out;\n"
532             << "\n"
533             << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
534             << "\n"
535             << taskDataDecl << "\n"
536             << "void main ()\n"
537             << "{\n"
538             << "    bool dataOK = true;\n"
539             << "    dataOK = (dataOK && (td.yes == 1u));\n"
540             << "    dataOK = (dataOK && (td.externalData.OneMillion == 1000000.0 && td.externalData.TwoMillion == "
541                "2000000u));\n"
542             << "    uint rowId = td.externalData.workGroupData.rowId.id;\n"
543             << "    dataOK = (dataOK && (rowId == 0u || rowId == 1u));\n"
544             << "\n"
545             << "    {\n"
546             << "        uint baseVal = (rowId + 1u) * 1000u;\n"
547             << "        for (uint i = 0; i < 10; i++) {\n"
548             << "            if (td.externalData.workGroupData.WorkGroupIdPlusOnex1000Iota[i] != float(baseVal + i)) {\n"
549             << "                dataOK = false;\n"
550             << "                break;\n"
551             << "            }\n"
552             << "        }\n"
553             << "    }\n"
554             << "\n"
555             << "    {\n"
556             << "        uint baseVal = (rowId + 1u) * 2000;\n"
557             << "        uvec3 expected = uvec3(baseVal, baseVal + 1, baseVal + 2);\n"
558             << "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex2000Iota != expected) {\n"
559             << "            dataOK = false;\n"
560             << "        }\n"
561             << "    }\n"
562             << "\n"
563             << "    {\n"
564             << "        uint baseVal = (rowId + 1u) * 3000;\n"
565             << "        vec2 expected = vec2(baseVal, baseVal + 1);\n"
566             << "        if (td.externalData.workGroupData.WorkGroupIdPlusOnex3000Iota != expected) {\n"
567             << "            dataOK = false;\n"
568             << "        }\n"
569             << "    }\n"
570             << "\n"
571             << "    uint columnId = gl_WorkGroupID.x;\n"
572             << "\n"
573             << "    uvec2 vertPrim = uvec2(0u, 0u);\n"
574             << "    if (dataOK) {\n"
575             << "        vertPrim = uvec2(4u, 2u);\n"
576             << "    }\n"
577             << "    SetMeshOutputsEXT(vertPrim.x, vertPrim.y);\n"
578             << "    if (vertPrim.y == 0u) {\n"
579             << "        return;\n"
580             << "    }\n"
581             << "\n"
582             << "    const vec4 outColor = vec4(rowId, columnId, 1.0f, 1.0f);\n"
583             << "    triangleColor[0] = outColor;\n"
584             << "    triangleColor[1] = outColor;\n"
585             << "\n"
586             << "    // Each local invocation will generate two points and one triangle from the quad.\n"
587             << "    // The first local invocation will generate the top quad vertices.\n"
588             << "    // The second invocation will generate the two bottom vertices.\n"
589             << "    vec4 left  = vec4(0.0, 0.0, 0.0, 1.0);\n"
590             << "    vec4 right = vec4(1.0, 0.0, 0.0, 1.0);\n"
591             << "\n"
592             << "    float localInvocationOffsetY = float(gl_LocalInvocationIndex);\n"
593             << "    left.y  += localInvocationOffsetY;\n"
594             << "    right.y += localInvocationOffsetY;\n"
595             << "\n"
596             << "    // The code above creates a quad from (0, 0) to (1, 1) but we need to offset it\n"
597             << "    // in X and/or Y depending on the row and column, to place it in other quadrants.\n"
598             << "    float quadrantOffsetX = float(int(columnId) - 1);\n"
599             << "    float quadrantOffsetY = float(int(rowId) - 1);\n"
600             << "\n"
601             << "    left.x  += quadrantOffsetX;\n"
602             << "    right.x += quadrantOffsetX;\n"
603             << "\n"
604             << "    left.y  += quadrantOffsetY;\n"
605             << "    right.y += quadrantOffsetY;\n"
606             << "\n"
607             << "    uint baseVertexId = 2*gl_LocalInvocationIndex;\n"
608             << "    gl_MeshVerticesEXT[baseVertexId + 0].gl_Position = left;\n"
609             << "    gl_MeshVerticesEXT[baseVertexId + 1].gl_Position = right;\n"
610             << "\n"
611             << "    // 0,1,2 or 1,2,3 (note: triangles alternate front face this way)\n"
612             << "    const uvec3 indices = uvec3(0 + gl_LocalInvocationIndex, 1 + gl_LocalInvocationIndex, 2 + "
613                "gl_LocalInvocationIndex);\n"
614             << "    gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = indices;\n"
615             << "}\n";
616         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
617     }
618 }
619 
createInstance(Context & context) const620 TestInstance *ComplexTaskDataCase::createInstance(Context &context) const
621 {
622     return new ComplexTaskDataInstance(context, m_params.get());
623 }
624 
625 // Verify drawing a single point.
626 class SinglePointCase : public MeshShaderMiscCase
627 {
628 public:
SinglePointCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params,bool writePointSize=true)629     SinglePointCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params, bool writePointSize = true)
630         : MeshShaderMiscCase(testCtx, name, std::move(params))
631         , m_writePointSize(writePointSize)
632     {
633     }
634 
635     void checkSupport(Context &context) const override;
636     void initPrograms(vk::SourceCollections &programCollection) const override;
637     TestInstance *createInstance(Context &context) const override;
638 
639 protected:
640     const bool m_writePointSize = true;
641 };
642 
643 class SinglePointInstance : public MeshShaderMiscInstance
644 {
645 public:
SinglePointInstance(Context & context,const MiscTestParams * params)646     SinglePointInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
647     {
648     }
649 
650     void generateReferenceLevel() override;
651 };
652 
checkSupport(Context & context) const653 void SinglePointCase::checkSupport(Context &context) const
654 {
655     MeshShaderMiscCase::checkSupport(context);
656 
657     if (!m_writePointSize)
658         context.requireDeviceFunctionality("VK_KHR_maintenance5");
659 }
660 
createInstance(Context & context) const661 TestInstance *SinglePointCase::createInstance(Context &context) const
662 {
663     return new SinglePointInstance(context, m_params.get());
664 }
665 
initPrograms(vk::SourceCollections & programCollection) const666 void SinglePointCase::initPrograms(vk::SourceCollections &programCollection) const
667 {
668     DE_ASSERT(!m_params->needsTaskShader());
669 
670     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
671 
672     MeshShaderMiscCase::initPrograms(programCollection);
673 
674     std::ostringstream mesh;
675     mesh << "#version 450\n"
676          << "#extension GL_EXT_mesh_shader : enable\n"
677          << "\n"
678          << "layout(local_size_x=1) in;\n"
679          << "layout(points) out;\n"
680          << "layout(max_vertices=256, max_primitives=256) out;\n"
681          << "\n"
682          << "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
683          << "\n"
684          << "void main ()\n"
685          << "{\n"
686          << "    SetMeshOutputsEXT(1u, 1u);\n"
687          << "    pointColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
688          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n";
689     if (m_writePointSize)
690     {
691         mesh << "    gl_MeshVerticesEXT[0].gl_PointSize = 1.0f;\n";
692     }
693     mesh << "    gl_PrimitivePointIndicesEXT[0] = 0;\n"
694          << "}\n";
695     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
696 }
697 
generateReferenceLevel()698 void SinglePointInstance::generateReferenceLevel()
699 {
700     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
701 
702     const auto halfWidth  = static_cast<int>(m_params->width / 2u);
703     const auto halfHeight = static_cast<int>(m_params->height / 2u);
704     const auto access     = m_referenceLevel->getAccess();
705 
706     access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
707 }
708 
709 // Verify drawing a single line.
710 class SingleLineCase : public MeshShaderMiscCase
711 {
712 public:
SingleLineCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)713     SingleLineCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
714         : MeshShaderMiscCase(testCtx, name, std::move(params))
715     {
716     }
717 
718     void initPrograms(vk::SourceCollections &programCollection) const override;
719     TestInstance *createInstance(Context &context) const override;
720 };
721 
722 class SingleLineInstance : public MeshShaderMiscInstance
723 {
724 public:
SingleLineInstance(Context & context,const MiscTestParams * params)725     SingleLineInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
726     {
727     }
728 
729     void generateReferenceLevel() override;
730 };
731 
createInstance(Context & context) const732 TestInstance *SingleLineCase::createInstance(Context &context) const
733 {
734     return new SingleLineInstance(context, m_params.get());
735 }
736 
initPrograms(vk::SourceCollections & programCollection) const737 void SingleLineCase::initPrograms(vk::SourceCollections &programCollection) const
738 {
739     DE_ASSERT(!m_params->needsTaskShader());
740 
741     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
742 
743     MeshShaderMiscCase::initPrograms(programCollection);
744 
745     std::ostringstream mesh;
746     mesh << "#version 450\n"
747          << "#extension GL_EXT_mesh_shader : enable\n"
748          << "\n"
749          << "layout(local_size_x=1) in;\n"
750          << "layout(lines) out;\n"
751          << "layout(max_vertices=256, max_primitives=256) out;\n"
752          << "\n"
753          << "layout (location=0) out perprimitiveEXT vec4 lineColor[];\n"
754          << "\n"
755          << "void main ()\n"
756          << "{\n"
757          << "    SetMeshOutputsEXT(2u, 1u);\n"
758          << "    lineColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
759          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0f, 0.0f, 0.0f, 1.0f);\n"
760          << "    gl_MeshVerticesEXT[1].gl_Position = vec4( 1.0f, 0.0f, 0.0f, 1.0f);\n"
761          << "    gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u);\n"
762          << "}\n";
763     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
764 }
765 
generateReferenceLevel()766 void SingleLineInstance::generateReferenceLevel()
767 {
768     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
769 
770     const auto iWidth     = static_cast<int>(m_params->width);
771     const auto halfHeight = static_cast<int>(m_params->height / 2u);
772     const auto access     = m_referenceLevel->getAccess();
773 
774     // Center row.
775     for (int x = 0; x < iWidth; ++x)
776         access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), x, halfHeight);
777 }
778 
779 // Verify drawing a single triangle.
780 class SingleTriangleCase : public MeshShaderMiscCase
781 {
782 public:
SingleTriangleCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)783     SingleTriangleCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
784         : MeshShaderMiscCase(testCtx, name, std::move(params))
785     {
786     }
787 
788     void initPrograms(vk::SourceCollections &programCollection) const override;
789     TestInstance *createInstance(Context &context) const override;
790 };
791 
792 class SingleTriangleInstance : public MeshShaderMiscInstance
793 {
794 public:
SingleTriangleInstance(Context & context,const MiscTestParams * params)795     SingleTriangleInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
796     {
797     }
798 
799     void generateReferenceLevel() override;
800 };
801 
createInstance(Context & context) const802 TestInstance *SingleTriangleCase::createInstance(Context &context) const
803 {
804     return new SingleTriangleInstance(context, m_params.get());
805 }
806 
initPrograms(vk::SourceCollections & programCollection) const807 void SingleTriangleCase::initPrograms(vk::SourceCollections &programCollection) const
808 {
809     DE_ASSERT(!m_params->needsTaskShader());
810 
811     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
812 
813     MeshShaderMiscCase::initPrograms(programCollection);
814 
815     const float halfPixelX = 2.0f / static_cast<float>(m_params->width);
816     const float halfPixelY = 2.0f / static_cast<float>(m_params->height);
817 
818     std::ostringstream mesh;
819     mesh << "#version 450\n"
820          << "#extension GL_EXT_mesh_shader : enable\n"
821          << "\n"
822          << "layout(local_size_x=1) in;\n"
823          << "layout(triangles) out;\n"
824          << "layout(max_vertices=256, max_primitives=256) out;\n"
825          << "\n"
826          << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
827          << "\n"
828          << "void main ()\n"
829          << "{\n"
830          << "    SetMeshOutputsEXT(3u, 1u);\n"
831          << "    triangleColor[0] = vec4(0.0f, 1.0f, 1.0f, 1.0f);\n"
832          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(" << halfPixelY << ", " << -halfPixelX << ", 0.0f, 1.0f);\n"
833          << "    gl_MeshVerticesEXT[1].gl_Position = vec4(" << halfPixelY << ", " << halfPixelX << ", 0.0f, 1.0f);\n"
834          << "    gl_MeshVerticesEXT[2].gl_Position = vec4(" << -halfPixelY << ", 0.0f, 0.0f, 1.0f);\n"
835          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
836          << "}\n";
837     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
838 }
839 
generateReferenceLevel()840 void SingleTriangleInstance::generateReferenceLevel()
841 {
842     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
843 
844     const auto halfWidth  = static_cast<int>(m_params->width / 2u);
845     const auto halfHeight = static_cast<int>(m_params->height / 2u);
846     const auto access     = m_referenceLevel->getAccess();
847 
848     // Single pixel in the center.
849     access.setPixel(tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f), halfWidth, halfHeight);
850 }
851 
852 // Verify drawing the maximum number of points.
853 class MaxPointsCase : public MeshShaderMiscCase
854 {
855 public:
MaxPointsCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)856     MaxPointsCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
857         : MeshShaderMiscCase(testCtx, name, std::move(params))
858     {
859     }
860 
861     void initPrograms(vk::SourceCollections &programCollection) const override;
862     TestInstance *createInstance(Context &context) const override;
863 };
864 
865 class MaxPointsInstance : public MeshShaderMiscInstance
866 {
867 public:
MaxPointsInstance(Context & context,const MiscTestParams * params)868     MaxPointsInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
869     {
870     }
871 
872     void generateReferenceLevel() override;
873 };
874 
createInstance(Context & context) const875 TestInstance *MaxPointsCase::createInstance(Context &context) const
876 {
877     return new MaxPointsInstance(context, m_params.get());
878 }
879 
initPrograms(vk::SourceCollections & programCollection) const880 void MaxPointsCase::initPrograms(vk::SourceCollections &programCollection) const
881 {
882     DE_ASSERT(!m_params->needsTaskShader());
883 
884     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
885 
886     MeshShaderMiscCase::initPrograms(programCollection);
887 
888     // Fill a 16x16 image with 256 points. Each of the 64 local invocations will handle a segment of 4 pixels. 4 segments per row.
889     DE_ASSERT(m_params->width == 16u && m_params->height == 16u);
890 
891     std::ostringstream mesh;
892     mesh << "#version 450\n"
893          << "#extension GL_EXT_mesh_shader : enable\n"
894          << "\n"
895          << "layout(local_size_x=8, local_size_y=2, local_size_z=4) in;\n"
896          << "layout(points) out;\n"
897          << "layout(max_vertices=256, max_primitives=256) out;\n"
898          << "\n"
899          << "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
900          << "\n"
901          << "void main ()\n"
902          << "{\n"
903          << "    SetMeshOutputsEXT(256u, 256u);\n"
904          << "    uint firstPixel = 4u * gl_LocalInvocationIndex;\n"
905          << "    uint row = firstPixel / 16u;\n"
906          << "    uint col = firstPixel % 16u;\n"
907          << "    float pixSize = 2.0f / 16.0f;\n"
908          << "    float yCoord = pixSize * (float(row) + 0.5f) - 1.0f;\n"
909          << "    float baseXCoord = pixSize * (float(col) + 0.5f) - 1.0f;\n"
910          << "    for (uint i = 0; i < 4u; i++) {\n"
911          << "        float xCoord = baseXCoord + pixSize * float(i);\n"
912          << "        uint pixId = firstPixel + i;\n"
913          << "        gl_MeshVerticesEXT[pixId].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
914          << "        gl_MeshVerticesEXT[pixId].gl_PointSize = 1.0f;\n"
915          << "        gl_PrimitivePointIndicesEXT[pixId] = pixId;\n"
916          << "        pointColor[pixId] = vec4(((xCoord + 1.0f) / 2.0f), ((yCoord + 1.0f) / 2.0f), 0.0f, 1.0f);\n"
917          << "    }\n"
918          << "}\n";
919     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
920 }
921 
generateReferenceLevel()922 void MaxPointsInstance::generateReferenceLevel()
923 {
924     const auto format    = getOutputFormat();
925     const auto tcuFormat = mapVkFormat(format);
926 
927     const auto iWidth  = static_cast<int>(m_params->width);
928     const auto iHeight = static_cast<int>(m_params->height);
929     const auto fWidth  = static_cast<float>(m_params->width);
930     const auto fHeight = static_cast<float>(m_params->height);
931 
932     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
933 
934     const auto access = m_referenceLevel->getAccess();
935 
936     // Fill with gradient like the shader does.
937     for (int y = 0; y < iHeight; ++y)
938         for (int x = 0; x < iWidth; ++x)
939         {
940             const tcu::Vec4 color(((static_cast<float>(x) + 0.5f) / fWidth), ((static_cast<float>(y) + 0.5f) / fHeight),
941                                   0.0f, 1.0f);
942             access.setPixel(color, x, y);
943         }
944 }
945 
946 // Verify drawing the maximum number of lines.
947 class MaxLinesCase : public MeshShaderMiscCase
948 {
949 public:
MaxLinesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)950     MaxLinesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
951         : MeshShaderMiscCase(testCtx, name, std::move(params))
952     {
953     }
954 
955     void initPrograms(vk::SourceCollections &programCollection) const override;
956     TestInstance *createInstance(Context &context) const override;
957 };
958 
959 class MaxLinesInstance : public MeshShaderMiscInstance
960 {
961 public:
MaxLinesInstance(Context & context,const MiscTestParams * params)962     MaxLinesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
963     {
964     }
965 
966     void generateReferenceLevel() override;
967 };
968 
createInstance(Context & context) const969 TestInstance *MaxLinesCase::createInstance(Context &context) const
970 {
971     return new MaxLinesInstance(context, m_params.get());
972 }
973 
initPrograms(vk::SourceCollections & programCollection) const974 void MaxLinesCase::initPrograms(vk::SourceCollections &programCollection) const
975 {
976     DE_ASSERT(!m_params->needsTaskShader());
977 
978     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
979 
980     MeshShaderMiscCase::initPrograms(programCollection);
981 
982     // Fill a 1x1020 image with 255 lines, each line being 4 pixels tall. Each invocation will generate ~4 lines.
983     DE_ASSERT(m_params->width == 1u && m_params->height == 1020u);
984 
985     std::ostringstream mesh;
986     mesh << "#version 450\n"
987          << "#extension GL_EXT_mesh_shader : enable\n"
988          << "\n"
989          << "layout(local_size_x=4, local_size_y=2, local_size_z=8) in;\n"
990          << "layout(lines) out;\n"
991          << "layout(max_vertices=256, max_primitives=255) out;\n"
992          << "\n"
993          << "layout (location=0) out perprimitiveEXT vec4 lineColor[];\n"
994          << "\n"
995          << "void main ()\n"
996          << "{\n"
997          << "    SetMeshOutputsEXT(256u, 255u);\n"
998          << "    uint firstLine = 4u * gl_LocalInvocationIndex;\n"
999          << "    for (uint i = 0u; i < 4u; i++) {\n"
1000          << "        uint lineId = firstLine + i;\n"
1001          << "        uint topPixel = 4u * lineId;\n"
1002          << "        uint bottomPixel = 3u + topPixel;\n"
1003          << "        if (bottomPixel < 1020u) {\n"
1004          << "            float bottomCoord = ((float(bottomPixel) + 1.0f) / 1020.0) * 2.0 - 1.0;\n"
1005          << "            gl_MeshVerticesEXT[lineId + 1u].gl_Position = vec4(0.0, bottomCoord, 0.0f, 1.0f);\n"
1006          << "            gl_PrimitiveLineIndicesEXT[lineId] = uvec2(lineId, lineId + 1u);\n"
1007          << "            lineColor[lineId] = vec4(0.0f, 1.0f, float(lineId) / 255.0f, 1.0f);\n"
1008          << "        } else {\n"
1009          << "            // The last iteration of the last invocation emits the first point\n"
1010          << "            gl_MeshVerticesEXT[0].gl_Position = vec4(0.0, -1.0, 0.0f, 1.0f);\n"
1011          << "        }\n"
1012          << "    }\n"
1013          << "}\n";
1014     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1015 }
1016 
generateReferenceLevel()1017 void MaxLinesInstance::generateReferenceLevel()
1018 {
1019     const auto format    = getOutputFormat();
1020     const auto tcuFormat = mapVkFormat(format);
1021 
1022     const auto iWidth  = static_cast<int>(m_params->width);
1023     const auto iHeight = static_cast<int>(m_params->height);
1024 
1025     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
1026 
1027     const auto access = m_referenceLevel->getAccess();
1028 
1029     // Fill lines, 4 pixels per line.
1030     const uint32_t kNumLines   = 255u;
1031     const uint32_t kLineHeight = 4u;
1032 
1033     for (uint32_t i = 0u; i < kNumLines; ++i)
1034     {
1035         const tcu::Vec4 color(0.0f, 1.0f, static_cast<float>(i) / static_cast<float>(kNumLines), 1.0f);
1036         for (uint32_t j = 0u; j < kLineHeight; ++j)
1037             access.setPixel(color, 0, i * kLineHeight + j);
1038     }
1039 }
1040 
1041 // Verify drawing the maximum number of triangles.
1042 class MaxTrianglesCase : public MeshShaderMiscCase
1043 {
1044 public:
1045     struct Params : public MiscTestParams
1046     {
1047         tcu::UVec3 localSize;
1048 
Paramsvkt::MeshShader::__anonb7c155300111::MaxTrianglesCase::Params1049         Params(const tcu::UVec3 &meshCount_, uint32_t width_, uint32_t height_, const tcu::UVec3 &localSize_)
1050             : MiscTestParams(tcu::Nothing, meshCount_, width_, height_)
1051             , localSize(localSize_)
1052         {
1053         }
1054     };
1055 
MaxTrianglesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1056     MaxTrianglesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1057         : MeshShaderMiscCase(testCtx, name, std::move(params))
1058     {
1059     }
1060 
1061     void initPrograms(vk::SourceCollections &programCollection) const override;
1062     TestInstance *createInstance(Context &context) const override;
1063 
1064     static constexpr uint32_t kNumVertices  = 256u;
1065     static constexpr uint32_t kNumTriangles = 254u;
1066 };
1067 
1068 class MaxTrianglesInstance : public MeshShaderMiscInstance
1069 {
1070 public:
MaxTrianglesInstance(Context & context,const MiscTestParams * params)1071     MaxTrianglesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1072     {
1073     }
1074 
1075     void generateReferenceLevel() override;
1076 };
1077 
createInstance(Context & context) const1078 TestInstance *MaxTrianglesCase::createInstance(Context &context) const
1079 {
1080     return new MaxTrianglesInstance(context, m_params.get());
1081 }
1082 
initPrograms(vk::SourceCollections & programCollection) const1083 void MaxTrianglesCase::initPrograms(vk::SourceCollections &programCollection) const
1084 {
1085     // Default frag shader.
1086     MeshShaderMiscCase::initPrograms(programCollection);
1087 
1088     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1089     const auto params       = dynamic_cast<const MaxTrianglesCase::Params *>(m_params.get());
1090 
1091     DE_ASSERT(params);
1092     DE_ASSERT(!params->needsTaskShader());
1093 
1094     const auto &localSize    = params->localSize;
1095     const auto workGroupSize = (localSize.x() * localSize.y() * localSize.z());
1096 
1097     DE_ASSERT(kNumVertices % workGroupSize == 0u);
1098     const auto trianglesPerInvocation = kNumVertices / workGroupSize;
1099 
1100     // Fill a sufficiently large image with solid color. Generate a quarter of a circle with the center in the top left corner,
1101     // using a triangle fan that advances from top to bottom. Each invocation will generate ~trianglesPerInvocation triangles.
1102     std::ostringstream mesh;
1103     mesh << "#version 450\n"
1104          << "#extension GL_EXT_mesh_shader : enable\n"
1105          << "\n"
1106          << "layout(local_size_x=" << localSize.x() << ", local_size_y=" << localSize.y()
1107          << ", local_size_z=" << localSize.z() << ") in;\n"
1108          << "layout(triangles) out;\n"
1109          << "layout(max_vertices=" << kNumVertices << ", max_primitives=" << kNumTriangles << ") out;\n"
1110          << "\n"
1111          << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
1112          << "\n"
1113          << "const float PI_2 = 1.57079632679489661923;\n"
1114          << "const float RADIUS = 4.5;\n"
1115          << "\n"
1116          << "void main ()\n"
1117          << "{\n"
1118          << "    const uint trianglesPerInvocation = " << trianglesPerInvocation << "u;\n"
1119          << "    const uint numVertices = " << kNumVertices << "u;\n"
1120          << "    const uint numTriangles = " << kNumTriangles << "u;\n"
1121          << "    const float fNumTriangles = float(numTriangles);\n"
1122          << "    SetMeshOutputsEXT(numVertices, numTriangles);\n"
1123          << "    uint firstTriangle = trianglesPerInvocation * gl_LocalInvocationIndex;\n"
1124          << "    for (uint i = 0u; i < trianglesPerInvocation; i++) {\n"
1125          << "        uint triangleId = firstTriangle + i;\n"
1126          << "        if (triangleId < numTriangles) {\n"
1127          << "            uint vertexId = triangleId + 2u;\n"
1128          << "            float angleProportion = float(vertexId - 1u) / fNumTriangles;\n"
1129          << "            float angle = PI_2 * angleProportion;\n"
1130          << "            float xCoord = cos(angle) * RADIUS - 1.0;\n"
1131          << "            float yCoord = sin(angle) * RADIUS - 1.0;\n"
1132          << "            gl_MeshVerticesEXT[vertexId].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1133          << "            gl_PrimitiveTriangleIndicesEXT[triangleId] = uvec3(0u, triangleId + 1u, triangleId + 2u);\n"
1134          << "            triangleColor[triangleId] = vec4(0.0f, 0.0f, 1.0f, 1.0f);\n"
1135          << "        } else {\n"
1136          << "            // The last iterations of the last invocation emit the first two vertices\n"
1137          << "            uint vertexId = triangleId - numTriangles;\n"
1138          << "            if (vertexId == 0u) {\n"
1139          << "                gl_MeshVerticesEXT[0u].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
1140          << "            } else {\n"
1141          << "                gl_MeshVerticesEXT[1u].gl_Position = vec4(RADIUS, -1.0, 0.0, 1.0);\n"
1142          << "            }\n"
1143          << "        }\n"
1144          << "    }\n"
1145          << "}\n";
1146     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1147 }
1148 
generateReferenceLevel()1149 void MaxTrianglesInstance::generateReferenceLevel()
1150 {
1151     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1152 }
1153 
1154 struct LargeWorkGroupParams : public MiscTestParams
1155 {
LargeWorkGroupParamsvkt::MeshShader::__anonb7c155300111::LargeWorkGroupParams1156     LargeWorkGroupParams(const tcu::Maybe<tcu::UVec3> &taskCount_, const tcu::UVec3 &meshCount_, uint32_t width_,
1157                          uint32_t height_, const tcu::UVec3 &localInvocations_)
1158         : MiscTestParams(taskCount_, meshCount_, width_, height_)
1159         , localInvocations(localInvocations_)
1160     {
1161     }
1162 
1163     tcu::UVec3 localInvocations;
1164 };
1165 
1166 // Large work groups with many threads.
1167 class LargeWorkGroupCase : public MeshShaderMiscCase
1168 {
1169 public:
LargeWorkGroupCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1170     LargeWorkGroupCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1171         : MeshShaderMiscCase(testCtx, name, std::move(params))
1172     {
1173     }
1174 
1175     void initPrograms(vk::SourceCollections &programCollection) const override;
1176     TestInstance *createInstance(Context &context) const override;
1177 };
1178 
1179 class LargeWorkGroupInstance : public MeshShaderMiscInstance
1180 {
1181 public:
LargeWorkGroupInstance(Context & context,const MiscTestParams * params)1182     LargeWorkGroupInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1183     {
1184     }
1185 
1186     void generateReferenceLevel() override;
1187 };
1188 
createInstance(Context & context) const1189 TestInstance *LargeWorkGroupCase::createInstance(Context &context) const
1190 {
1191     return new LargeWorkGroupInstance(context, m_params.get());
1192 }
1193 
generateReferenceLevel()1194 void LargeWorkGroupInstance::generateReferenceLevel()
1195 {
1196     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1197 }
1198 
1199 // 'x', 'y' or 'z' depending on if dim is 0, 1 or 2, respectively.
dimSuffix(int dim)1200 char dimSuffix(int dim)
1201 {
1202     const std::string suffixes = "xyz";
1203     DE_ASSERT(dim >= 0 && dim < static_cast<int>(suffixes.size()));
1204     return suffixes[dim];
1205 }
1206 
initPrograms(vk::SourceCollections & programCollection) const1207 void LargeWorkGroupCase::initPrograms(vk::SourceCollections &programCollection) const
1208 {
1209     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1210     const auto params       = dynamic_cast<LargeWorkGroupParams *>(m_params.get());
1211     DE_ASSERT(params);
1212 
1213     const auto totalInvocations =
1214         params->localInvocations.x() * params->localInvocations.y() * params->localInvocations.z();
1215     const auto useTaskShader  = params->needsTaskShader();
1216     uint32_t taskMultiplier   = 1u;
1217     const auto &meshCount     = params->meshCount;
1218     const auto meshMultiplier = meshCount.x() * meshCount.y() * meshCount.z();
1219 
1220     if (useTaskShader)
1221     {
1222         const auto dim = params->taskCount.get();
1223         taskMultiplier = dim.x() * dim.y() * dim.z();
1224     }
1225 
1226     // Add the frag shader.
1227     MeshShaderMiscCase::initPrograms(programCollection);
1228 
1229     std::ostringstream taskData;
1230     taskData << "struct TaskData {\n"
1231              << "    uint parentTask[" << totalInvocations << "];\n"
1232              << "};\n"
1233              << "taskPayloadSharedEXT TaskData td;\n";
1234     const auto taskDataStr = taskData.str();
1235 
1236     const std::string localSizeStr = "layout ("
1237                                      "local_size_x=" +
1238                                      std::to_string(params->localInvocations.x()) +
1239                                      ", "
1240                                      "local_size_y=" +
1241                                      std::to_string(params->localInvocations.y()) +
1242                                      ", "
1243                                      "local_size_z=" +
1244                                      std::to_string(params->localInvocations.z()) + ") in;\n";
1245 
1246     if (useTaskShader)
1247     {
1248         std::ostringstream task;
1249         task << "#version 450\n"
1250              << "#extension GL_EXT_mesh_shader : enable\n"
1251              << "\n"
1252              << localSizeStr << "\n"
1253              << taskDataStr << "\n"
1254              << "void main () {\n"
1255              << "    const uint workGroupIndex = gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupID.z + "
1256                 "gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1257              << "    td.parentTask[gl_LocalInvocationIndex] = workGroupIndex;\n"
1258              << "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
1259              << "}\n";
1260         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1261     }
1262 
1263     // Needed for the code below to work.
1264     DE_ASSERT(params->width * params->height == taskMultiplier * meshMultiplier * totalInvocations);
1265     DE_UNREF(taskMultiplier); // For release builds.
1266 
1267     // Emit one point per framebuffer pixel. The number of jobs (params->localInvocations in each mesh shader work group, multiplied
1268     // by the number of mesh work groups emitted by each task work group) must be the same as the total framebuffer size. Calculate
1269     // a job ID corresponding to the current mesh shader invocation, and assign a pixel position to it. Draw a point at that
1270     // position.
1271     std::ostringstream mesh;
1272     mesh << "#version 450\n"
1273          << "#extension GL_EXT_mesh_shader : enable\n"
1274          << "\n"
1275          << localSizeStr << "layout (points) out;\n"
1276          << "layout (max_vertices=" << totalInvocations << ", max_primitives=" << totalInvocations << ") out;\n"
1277          << "\n"
1278          << (useTaskShader ? taskDataStr : "") << "\n"
1279          << "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
1280          << "\n"
1281          << "void main () {\n"
1282          << "    uint parentTask = " << (useTaskShader ? "td.parentTask[0]" : "0") << ";\n";
1283     ;
1284 
1285     if (useTaskShader)
1286     {
1287         mesh << "    if (td.parentTask[gl_LocalInvocationIndex] != parentTask || parentTask >= " << taskMultiplier
1288              << ") {\n"
1289              << "        return;\n"
1290              << "    }\n";
1291     }
1292 
1293     mesh << "    SetMeshOutputsEXT(" << totalInvocations << ", " << totalInvocations << ");\n"
1294          << "    const uint workGroupIndex = gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupID.z + "
1295             "gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1296          << "    uint jobId = ((parentTask * " << meshMultiplier << ") + workGroupIndex) * " << totalInvocations
1297          << " + gl_LocalInvocationIndex;\n"
1298          << "    uint row = jobId / " << params->width << ";\n"
1299          << "    uint col = jobId % " << params->width << ";\n"
1300          << "    float yCoord = (float(row + 0.5) / " << params->height << ".0) * 2.0 - 1.0;\n"
1301          << "    float xCoord = (float(col + 0.5) / " << params->width << ".0) * 2.0 - 1.0;\n"
1302          << "    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1303          << "    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 1.0;\n"
1304          << "    gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
1305          << "    vec4 resultColor = vec4(0.0, 0.0, 1.0, 1.0);\n";
1306 
1307     mesh << "    pointColor[gl_LocalInvocationIndex] = resultColor;\n"
1308          << "}\n";
1309     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1310 }
1311 
1312 // Tests that generate no primitives of a given type.
1313 enum class PrimitiveType
1314 {
1315     POINTS = 0,
1316     LINES,
1317     TRIANGLES
1318 };
1319 
primitiveTypeName(PrimitiveType primitiveType)1320 std::string primitiveTypeName(PrimitiveType primitiveType)
1321 {
1322     std::string primitiveName;
1323 
1324     switch (primitiveType)
1325     {
1326     case PrimitiveType::POINTS:
1327         primitiveName = "points";
1328         break;
1329     case PrimitiveType::LINES:
1330         primitiveName = "lines";
1331         break;
1332     case PrimitiveType::TRIANGLES:
1333         primitiveName = "triangles";
1334         break;
1335     default:
1336         DE_ASSERT(false);
1337         break;
1338     }
1339 
1340     return primitiveName;
1341 }
1342 
1343 struct NoPrimitivesParams : public MiscTestParams
1344 {
NoPrimitivesParamsvkt::MeshShader::__anonb7c155300111::NoPrimitivesParams1345     NoPrimitivesParams(const tcu::Maybe<tcu::UVec3> &taskCount_, const tcu::UVec3 &meshCount_, uint32_t width_,
1346                        uint32_t height_, PrimitiveType primitiveType_)
1347         : MiscTestParams(taskCount_, meshCount_, width_, height_)
1348         , primitiveType(primitiveType_)
1349     {
1350     }
1351 
1352     PrimitiveType primitiveType;
1353 };
1354 
1355 class NoPrimitivesCase : public MeshShaderMiscCase
1356 {
1357 public:
NoPrimitivesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1358     NoPrimitivesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1359         : MeshShaderMiscCase(testCtx, name, std::move(params))
1360     {
1361     }
1362 
1363     void initPrograms(vk::SourceCollections &programCollection) const override;
1364     TestInstance *createInstance(Context &context) const override;
1365 };
1366 
1367 class NoPrimitivesInstance : public MeshShaderMiscInstance
1368 {
1369 public:
NoPrimitivesInstance(Context & context,const MiscTestParams * params)1370     NoPrimitivesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1371     {
1372     }
1373 
1374     void generateReferenceLevel() override;
1375 };
1376 
generateReferenceLevel()1377 void NoPrimitivesInstance::generateReferenceLevel()
1378 {
1379     // No primitives: clear color.
1380     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), m_referenceLevel);
1381 }
1382 
createInstance(Context & context) const1383 TestInstance *NoPrimitivesCase::createInstance(Context &context) const
1384 {
1385     return new NoPrimitivesInstance(context, m_params.get());
1386 }
1387 
initPrograms(vk::SourceCollections & programCollection) const1388 void NoPrimitivesCase::initPrograms(vk::SourceCollections &programCollection) const
1389 {
1390     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1391     const auto params       = dynamic_cast<NoPrimitivesParams *>(m_params.get());
1392 
1393     DE_ASSERT(params);
1394     DE_ASSERT(!params->needsTaskShader());
1395 
1396     const auto primitiveName = primitiveTypeName(params->primitiveType);
1397 
1398     std::ostringstream mesh;
1399     mesh << "#version 450\n"
1400          << "#extension GL_EXT_mesh_shader : enable\n"
1401          << "\n"
1402          << "layout (local_size_x=128) in;\n"
1403          << "layout (" << primitiveName << ") out;\n"
1404          << "layout (max_vertices=256, max_primitives=256) out;\n"
1405          << "\n"
1406          << "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1407          << "\n"
1408          << "void main () {\n"
1409          << "    SetMeshOutputsEXT(0u, 0u);\n"
1410          << "}\n";
1411 
1412     MeshShaderMiscCase::initPrograms(programCollection);
1413     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1414 }
1415 
1416 class NoPrimitivesExtraWritesCase : public NoPrimitivesCase
1417 {
1418 public:
NoPrimitivesExtraWritesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1419     NoPrimitivesExtraWritesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1420         : NoPrimitivesCase(testCtx, name, std::move(params))
1421     {
1422     }
1423 
1424     void initPrograms(vk::SourceCollections &programCollection) const override;
1425 
1426     static constexpr uint32_t kLocalInvocations = 128u;
1427 };
1428 
initPrograms(vk::SourceCollections & programCollection) const1429 void NoPrimitivesExtraWritesCase::initPrograms(vk::SourceCollections &programCollection) const
1430 {
1431     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1432     const auto params       = dynamic_cast<NoPrimitivesParams *>(m_params.get());
1433 
1434     DE_ASSERT(params);
1435     DE_ASSERT(m_params->needsTaskShader());
1436 
1437     std::ostringstream taskData;
1438     taskData << "struct TaskData {\n"
1439              << "    uint localInvocations[" << kLocalInvocations << "];\n"
1440              << "};\n"
1441              << "taskPayloadSharedEXT TaskData td;\n";
1442     const auto taskDataStr = taskData.str();
1443 
1444     std::ostringstream task;
1445     task << "#version 450\n"
1446          << "#extension GL_EXT_mesh_shader : enable\n"
1447          << "\n"
1448          << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1449          << "\n"
1450          << taskDataStr << "\n"
1451          << "void main () {\n"
1452          << "    td.localInvocations[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
1453          << "    EmitMeshTasksEXT(" << params->meshCount.x() << ", " << params->meshCount.y() << ", "
1454          << params->meshCount.z() << ");\n"
1455          << "}\n";
1456     programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1457 
1458     const auto primitiveName = primitiveTypeName(params->primitiveType);
1459 
1460     // Otherwise the shader would be illegal.
1461     DE_ASSERT(kLocalInvocations > 2u);
1462 
1463     uint32_t maxPrimitives = 0u;
1464     switch (params->primitiveType)
1465     {
1466     case PrimitiveType::POINTS:
1467         maxPrimitives = kLocalInvocations - 0u;
1468         break;
1469     case PrimitiveType::LINES:
1470         maxPrimitives = kLocalInvocations - 1u;
1471         break;
1472     case PrimitiveType::TRIANGLES:
1473         maxPrimitives = kLocalInvocations - 2u;
1474         break;
1475     default:
1476         DE_ASSERT(false);
1477         break;
1478     }
1479 
1480     const std::string pointSizeDecl = ((params->primitiveType == PrimitiveType::POINTS) ?
1481                                            "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 1.0;\n" :
1482                                            "");
1483 
1484     std::ostringstream mesh;
1485     mesh << "#version 450\n"
1486          << "#extension GL_EXT_mesh_shader : enable\n"
1487          << "\n"
1488          << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1489          << "layout (" << primitiveName << ") out;\n"
1490          << "layout (max_vertices=" << kLocalInvocations << ", max_primitives=" << maxPrimitives << ") out;\n"
1491          << "\n"
1492          << taskDataStr << "\n"
1493          << "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1494          << "\n"
1495          << "shared uint sumOfIds;\n"
1496          << "\n"
1497          << "const float PI_2 = 1.57079632679489661923;\n"
1498          << "const float RADIUS = 1.0f;\n"
1499          << "\n"
1500          << "void main ()\n"
1501          << "{\n"
1502          << "    sumOfIds = 0u;\n"
1503          << "    memoryBarrierShared();\n"
1504          << "    barrier();\n"
1505          << "    atomicAdd(sumOfIds, td.localInvocations[gl_LocalInvocationIndex]);\n"
1506          << "    memoryBarrierShared();\n"
1507          << "    barrier();\n"
1508          << "    // This should dynamically give 0\n"
1509          << "    uint primitiveCount = sumOfIds - (" << kLocalInvocations * (kLocalInvocations - 1u) / 2u << ");\n"
1510          << "    SetMeshOutputsEXT(primitiveCount, primitiveCount);\n"
1511          << "\n"
1512          << "    // Emit points and primitives to the arrays in any case\n"
1513          << "    if (gl_LocalInvocationIndex > 0u) {\n"
1514          << "        float proportion = (float(gl_LocalInvocationIndex - 1u) + 0.5f) / float(" << kLocalInvocations
1515          << " - 1u);\n"
1516          << "        float angle = PI_2 * proportion;\n"
1517          << "        float xCoord = cos(angle) * RADIUS - 1.0;\n"
1518          << "        float yCoord = sin(angle) * RADIUS - 1.0;\n"
1519          << "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(xCoord, yCoord, 0.0, 1.0);\n"
1520          << pointSizeDecl << "    } else {\n"
1521          << "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1522          << pointSizeDecl << "    }\n"
1523          << "    uint primitiveId = max(gl_LocalInvocationIndex, " << (maxPrimitives - 1u) << ");\n"
1524          << "    primitiveColor[primitiveId] = vec4(0.0, 0.0, 1.0, 1.0);\n";
1525 
1526     if (params->primitiveType == PrimitiveType::POINTS)
1527         mesh << "    gl_PrimitivePointIndicesEXT[primitiveId] = primitiveId;\n";
1528     else if (params->primitiveType == PrimitiveType::LINES)
1529         mesh << "    gl_PrimitiveLineIndicesEXT[primitiveId] = uvec2(primitiveId + 0u, primitiveId + 1u);\n";
1530     else if (params->primitiveType == PrimitiveType::TRIANGLES)
1531         mesh << "    gl_PrimitiveTriangleIndicesEXT[primitiveId] = uvec3(0u, primitiveId + 1u, primitiveId + 3u);\n";
1532     else
1533         DE_ASSERT(false);
1534 
1535     mesh << "}\n";
1536 
1537     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1538 
1539     MeshShaderMiscCase::initPrograms(programCollection);
1540 }
1541 
1542 // Case testing barrier().
1543 class SimpleBarrierCase : public MeshShaderMiscCase
1544 {
1545 public:
SimpleBarrierCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1546     SimpleBarrierCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1547         : MeshShaderMiscCase(testCtx, name, std::move(params))
1548     {
1549     }
1550 
1551     void initPrograms(vk::SourceCollections &programCollection) const override;
1552     TestInstance *createInstance(Context &context) const override;
1553 
1554     static constexpr uint32_t kLocalInvocations = 32u;
1555 };
1556 
1557 class SimpleBarrierInstance : public MeshShaderMiscInstance
1558 {
1559 public:
SimpleBarrierInstance(Context & context,const MiscTestParams * params)1560     SimpleBarrierInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1561     {
1562     }
1563 
1564     void generateReferenceLevel() override;
1565 };
1566 
createInstance(Context & context) const1567 TestInstance *SimpleBarrierCase::createInstance(Context &context) const
1568 {
1569     return new SimpleBarrierInstance(context, m_params.get());
1570 }
1571 
generateReferenceLevel()1572 void SimpleBarrierInstance::generateReferenceLevel()
1573 {
1574     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1575 }
1576 
initPrograms(vk::SourceCollections & programCollection) const1577 void SimpleBarrierCase::initPrograms(vk::SourceCollections &programCollection) const
1578 {
1579     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1580 
1581     // Generate frag shader.
1582     MeshShaderMiscCase::initPrograms(programCollection);
1583 
1584     DE_ASSERT(m_params->meshCount == tcu::UVec3(1u, 1u, 1u));
1585     DE_ASSERT(m_params->width == 1u && m_params->height == 1u);
1586 
1587     const std::string taskOK   = "workGroupSize = uvec3(1u, 1u, 1u);\n";
1588     const std::string taskFAIL = "workGroupSize = uvec3(0u, 0u, 0u);\n";
1589 
1590     const std::string meshOK   = "vertPrim = uvec2(1u, 1u);\n";
1591     const std::string meshFAIL = "vertPrim = uvec2(0u, 0u);\n";
1592 
1593     const std::string okStatement   = (m_params->needsTaskShader() ? taskOK : meshOK);
1594     const std::string failStatement = (m_params->needsTaskShader() ? taskFAIL : meshFAIL);
1595 
1596     const std::string sharedDecl = "shared uint counter;\n\n";
1597     std::ostringstream verification;
1598     verification << "counter = 0;\n"
1599                  << "memoryBarrierShared();\n"
1600                  << "barrier();\n"
1601                  << "atomicAdd(counter, 1u);\n"
1602                  << "memoryBarrierShared();\n"
1603                  << "barrier();\n"
1604                  << "if (gl_LocalInvocationIndex == 0u) {\n"
1605                  << "    if (counter == " << kLocalInvocations << ") {\n"
1606                  << "\n"
1607                  << okStatement << "\n"
1608                  << "    } else {\n"
1609                  << "\n"
1610                  << failStatement << "\n"
1611                  << "    }\n"
1612                  << "}\n";
1613 
1614     // The mesh shader is very similar in both cases, so we use a template.
1615     std::ostringstream meshTemplateStr;
1616     meshTemplateStr << "#version 450\n"
1617                     << "#extension GL_EXT_mesh_shader : enable\n"
1618                     << "\n"
1619                     << "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1620                     << "layout (points) out;\n"
1621                     << "layout (max_vertices=1, max_primitives=1) out;\n"
1622                     << "\n"
1623                     << "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1624                     << "\n"
1625                     << "${GLOBALS:opt}"
1626                     << "void main ()\n"
1627                     << "{\n"
1628                     << "    uvec2 vertPrim = uvec2(0u, 0u);\n"
1629                     << "${BODY}"
1630                     << "    SetMeshOutputsEXT(vertPrim.x, vertPrim.y);\n"
1631                     << "    if (gl_LocalInvocationIndex == 0u && vertPrim.x > 0u) {\n"
1632                     << "        gl_MeshVerticesEXT[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1633                     << "        gl_MeshVerticesEXT[0].gl_PointSize = 1.0;\n"
1634                     << "        primitiveColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
1635                     << "        gl_PrimitivePointIndicesEXT[0] = 0;\n"
1636                     << "    }\n"
1637                     << "}\n";
1638     const tcu::StringTemplate meshTemplate = meshTemplateStr.str();
1639 
1640     if (m_params->needsTaskShader())
1641     {
1642         std::ostringstream task;
1643         task << "#version 450\n"
1644              << "#extension GL_EXT_mesh_shader : enable\n"
1645              << "\n"
1646              << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1647              << "\n"
1648              << sharedDecl << "void main ()\n"
1649              << "{\n"
1650              << "    uvec3 workGroupSize = uvec3(0u, 0u, 0u);\n"
1651              << verification.str() << "    EmitMeshTasksEXT(workGroupSize.x, workGroupSize.y, workGroupSize.z);\n"
1652              << "}\n";
1653 
1654         std::map<std::string, std::string> replacements;
1655         replacements["LOCAL_SIZE"] = "1";
1656         replacements["BODY"]       = meshOK;
1657 
1658         const auto meshStr = meshTemplate.specialize(replacements);
1659 
1660         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1661         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1662     }
1663     else
1664     {
1665         std::map<std::string, std::string> replacements;
1666         replacements["LOCAL_SIZE"] = std::to_string(kLocalInvocations);
1667         replacements["BODY"]       = verification.str();
1668         replacements["GLOBALS"]    = sharedDecl;
1669 
1670         const auto meshStr = meshTemplate.specialize(replacements);
1671 
1672         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1673     }
1674 }
1675 
1676 // Case testing memoryBarrierShared() and groupMemoryBarrier().
1677 enum class MemoryBarrierType
1678 {
1679     SHARED = 0,
1680     GROUP
1681 };
1682 
1683 struct MemoryBarrierParams : public MiscTestParams
1684 {
MemoryBarrierParamsvkt::MeshShader::__anonb7c155300111::MemoryBarrierParams1685     MemoryBarrierParams(const tcu::Maybe<tcu::UVec3> &taskCount_, const tcu::UVec3 &meshCount_, uint32_t width_,
1686                         uint32_t height_, MemoryBarrierType memBarrierType_)
1687         : MiscTestParams(taskCount_, meshCount_, width_, height_)
1688         , memBarrierType(memBarrierType_)
1689     {
1690     }
1691 
1692     MemoryBarrierType memBarrierType;
1693 
glslFuncvkt::MeshShader::__anonb7c155300111::MemoryBarrierParams1694     std::string glslFunc() const
1695     {
1696         std::string funcName;
1697 
1698         switch (memBarrierType)
1699         {
1700         case MemoryBarrierType::SHARED:
1701             funcName = "memoryBarrierShared";
1702             break;
1703         case MemoryBarrierType::GROUP:
1704             funcName = "groupMemoryBarrier";
1705             break;
1706         default:
1707             DE_ASSERT(false);
1708             break;
1709         }
1710 
1711         return funcName;
1712     }
1713 };
1714 
1715 class MemoryBarrierCase : public MeshShaderMiscCase
1716 {
1717 public:
MemoryBarrierCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1718     MemoryBarrierCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1719         : MeshShaderMiscCase(testCtx, name, std::move(params))
1720     {
1721     }
1722 
1723     void initPrograms(vk::SourceCollections &programCollection) const override;
1724     TestInstance *createInstance(Context &context) const override;
1725 
1726     static constexpr uint32_t kLocalInvocations = 2u;
1727 };
1728 
1729 class MemoryBarrierInstance : public MeshShaderMiscInstance
1730 {
1731 public:
MemoryBarrierInstance(Context & context,const MiscTestParams * params)1732     MemoryBarrierInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1733     {
1734     }
1735 
1736     void generateReferenceLevel() override;
1737     bool verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const override;
1738 
1739 protected:
1740     // Allow two possible outcomes.
1741     std::unique_ptr<tcu::TextureLevel> m_referenceLevel2;
1742 };
1743 
createInstance(Context & context) const1744 TestInstance *MemoryBarrierCase::createInstance(Context &context) const
1745 {
1746     return new MemoryBarrierInstance(context, m_params.get());
1747 }
1748 
generateReferenceLevel()1749 void MemoryBarrierInstance::generateReferenceLevel()
1750 {
1751     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
1752     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f), m_referenceLevel2);
1753 }
1754 
verifyResult(const tcu::ConstPixelBufferAccess & resultAccess) const1755 bool MemoryBarrierInstance::verifyResult(const tcu::ConstPixelBufferAccess &resultAccess) const
1756 {
1757     // Any of the two results is considered valid.
1758     constexpr auto Message    = tcu::TestLog::Message;
1759     constexpr auto EndMessage = tcu::TestLog::EndMessage;
1760 
1761     // Clarify what we are checking in the logs; otherwise, they could be confusing.
1762     auto &log                                     = m_context.getTestContext().getLog();
1763     const std::vector<tcu::TextureLevel *> levels = {m_referenceLevel.get(), m_referenceLevel2.get()};
1764 
1765     bool good = false;
1766     for (size_t i = 0; i < levels.size(); ++i)
1767     {
1768         log << Message << "Comparing result with reference " << i << "..." << EndMessage;
1769         const auto success = MeshShaderMiscInstance::verifyResult(resultAccess, *levels[i]);
1770         if (success)
1771         {
1772             log << Message << "Match! The test has passed" << EndMessage;
1773             good = true;
1774             break;
1775         }
1776     }
1777 
1778     return good;
1779 }
1780 
initPrograms(vk::SourceCollections & programCollection) const1781 void MemoryBarrierCase::initPrograms(vk::SourceCollections &programCollection) const
1782 {
1783     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1784     const auto params       = dynamic_cast<MemoryBarrierParams *>(m_params.get());
1785     DE_ASSERT(params);
1786 
1787     // Generate frag shader.
1788     MeshShaderMiscCase::initPrograms(programCollection);
1789 
1790     DE_ASSERT(params->meshCount == tcu::UVec3(1u, 1u, 1u));
1791     DE_ASSERT(params->width == 1u && params->height == 1u);
1792 
1793     const bool taskShader = params->needsTaskShader();
1794 
1795     const std::string taskDataDecl = "struct TaskData { float blue; }; taskPayloadSharedEXT TaskData td;\n\n";
1796     const auto barrierFunc         = params->glslFunc();
1797 
1798     const std::string taskAction = "td.blue = float(iterations % 2u);\nworkGroupSize = uvec3(1u, 1u, 1u);\n";
1799     const std::string meshAction = "vertPrim = uvec2(1u, 1u);\n";
1800     const std::string action     = (taskShader ? taskAction : meshAction);
1801 
1802     const std::string sharedDecl = "shared uint flags[2];\n\n";
1803     std::ostringstream verification;
1804     verification << "flags[gl_LocalInvocationIndex] = 0u;\n"
1805                  << "barrier();\n"
1806                  << "flags[gl_LocalInvocationIndex] = 1u;\n"
1807                  << barrierFunc << "();\n"
1808                  << "uint otherInvocation = 1u - gl_LocalInvocationIndex;\n"
1809                  << "uint iterations = 0u;\n"
1810                  << "while (flags[otherInvocation] != 1u) {\n"
1811                  << "    iterations++;\n"
1812                  << "}\n"
1813                  << "if (gl_LocalInvocationIndex == 0u) {\n"
1814                  << "\n"
1815                  << action << "\n"
1816                  << "}\n";
1817 
1818     // The mesh shader is very similar in both cases, so we use a template.
1819     std::ostringstream meshTemplateStr;
1820     meshTemplateStr << "#version 450\n"
1821                     << "#extension GL_EXT_mesh_shader : enable\n"
1822                     << "\n"
1823                     << "layout (local_size_x=${LOCAL_SIZE}) in;\n"
1824                     << "layout (points) out;\n"
1825                     << "layout (max_vertices=1, max_primitives=1) out;\n"
1826                     << "\n"
1827                     << "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1828                     << "\n"
1829                     << "${GLOBALS}"
1830                     << "void main ()\n"
1831                     << "{\n"
1832                     << "    uvec2 vertPrim = uvec2(0u, 0u);\n"
1833                     << "${BODY}"
1834                     << "    SetMeshOutputsEXT(vertPrim.x, vertPrim.y);\n"
1835                     << "    if (gl_LocalInvocationIndex == 0u && vertPrim.x > 0u) {\n"
1836                     << "        gl_MeshVerticesEXT[0].gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
1837                     << "        gl_MeshVerticesEXT[0].gl_PointSize = 1.0;\n"
1838                     << "        primitiveColor[0] = vec4(0.0, 0.0, ${BLUE}, 1.0);\n"
1839                     << "        gl_PrimitivePointIndicesEXT[0] = 0;\n"
1840                     << "    }\n"
1841                     << "}\n";
1842     const tcu::StringTemplate meshTemplate = meshTemplateStr.str();
1843 
1844     if (params->needsTaskShader())
1845     {
1846         std::ostringstream task;
1847         task << "#version 450\n"
1848              << "#extension GL_EXT_mesh_shader : enable\n"
1849              << "\n"
1850              << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1851              << "\n"
1852              << sharedDecl << taskDataDecl << "void main ()\n"
1853              << "{\n"
1854              << "    uvec3 workGroupSize = uvec3(0u, 0u, 0u);\n"
1855              << verification.str() << "    EmitMeshTasksEXT(workGroupSize.x, workGroupSize.y, workGroupSize.z);\n"
1856              << "}\n";
1857 
1858         std::map<std::string, std::string> replacements;
1859         replacements["LOCAL_SIZE"] = "1";
1860         replacements["BODY"]       = meshAction;
1861         replacements["GLOBALS"]    = taskDataDecl;
1862         replacements["BLUE"]       = "td.blue";
1863 
1864         const auto meshStr = meshTemplate.specialize(replacements);
1865 
1866         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1867         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1868     }
1869     else
1870     {
1871         std::map<std::string, std::string> replacements;
1872         replacements["LOCAL_SIZE"] = std::to_string(kLocalInvocations);
1873         replacements["BODY"]       = verification.str();
1874         replacements["GLOBALS"]    = sharedDecl;
1875         replacements["BLUE"]       = "float(iterations % 2u)";
1876 
1877         const auto meshStr = meshTemplate.specialize(replacements);
1878 
1879         programCollection.glslSources.add("mesh") << glu::MeshSource(meshStr) << buildOptions;
1880     }
1881 }
1882 
1883 // Test the task payload can be read by all invocations in the work group.
1884 class PayloadReadCase : public MeshShaderMiscCase
1885 {
1886 public:
PayloadReadCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)1887     PayloadReadCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
1888         : MeshShaderMiscCase(testCtx, name, std::move(params))
1889     {
1890     }
1891 
1892     void initPrograms(vk::SourceCollections &programCollection) const override;
1893     TestInstance *createInstance(Context &context) const override;
1894 
1895     static constexpr uint32_t kLocalInvocations = 128u;
1896 };
1897 
1898 class PayloadReadInstance : public MeshShaderMiscInstance
1899 {
1900 public:
PayloadReadInstance(Context & context,const MiscTestParams * params)1901     PayloadReadInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
1902     {
1903     }
1904 
1905     void generateReferenceLevel() override;
1906 };
1907 
createInstance(Context & context) const1908 TestInstance *PayloadReadCase::createInstance(Context &context) const
1909 {
1910     return new PayloadReadInstance(context, m_params.get());
1911 }
1912 
initPrograms(vk::SourceCollections & programCollection) const1913 void PayloadReadCase::initPrograms(vk::SourceCollections &programCollection) const
1914 {
1915     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1916 
1917     // Add default fragment shader.
1918     MeshShaderMiscCase::initPrograms(programCollection);
1919 
1920     std::ostringstream taskPayload;
1921     taskPayload << "struct TaskData {\n"
1922                 << "    uint verificationCodes[" << kLocalInvocations << "];\n"
1923                 << "    vec4 color;\n"
1924                 << "};\n"
1925                 << "taskPayloadSharedEXT TaskData td;\n";
1926     const std::string taskPayloadDecl = taskPayload.str();
1927 
1928     DE_ASSERT(m_params->needsTaskShader());
1929 
1930     const auto &meshCount = m_params->meshCount;
1931     DE_ASSERT(meshCount.x() == 1u && meshCount.y() == 1u && meshCount.z() == 1u);
1932 
1933     const auto kLocalInvocations2 = kLocalInvocations * 2u;
1934 
1935     std::ostringstream task;
1936     task << "#version 450\n"
1937          << "#extension GL_EXT_mesh_shader : enable\n"
1938          << "\n"
1939          << "layout (local_size_x=" << kLocalInvocations << ") in;\n"
1940          << "\n"
1941          << taskPayloadDecl << "shared uint verificationOK[" << kLocalInvocations << "];\n"
1942          << "\n"
1943          << "void main ()\n"
1944          << "{\n"
1945          << "    td.verificationCodes[gl_LocalInvocationIndex] = (" << kLocalInvocations2
1946          << " - gl_LocalInvocationIndex);\n"
1947          << "    memoryBarrierShared();\n"
1948          << "    barrier();\n"
1949          // Verify all codes from all invocations.
1950          << "    uint verificationResult = 1u;\n"
1951          << "    for (uint i = 0u; i < " << kLocalInvocations << "; ++i) {\n"
1952          << "        if (td.verificationCodes[i] != (" << kLocalInvocations2 << " - i)) {\n"
1953          << "            verificationResult = 0u;\n"
1954          << "            break;\n"
1955          << "        }\n"
1956          << "    }\n"
1957          << "    verificationOK[gl_LocalInvocationIndex] = verificationResult;\n"
1958          << "    memoryBarrierShared();\n"
1959          << "    barrier();\n"
1960          // Check all verifications were OK (from the first invocation).
1961          << "    if (gl_LocalInvocationIndex == 0u) {\n"
1962          << "        vec4 color = vec4(0.0, 0.0, 1.0, 1.0);\n"
1963          << "        for (uint i = 0u; i < " << kLocalInvocations << "; ++i) {\n"
1964          << "            if (verificationOK[i] == 0u) {\n"
1965          << "                color = vec4(0.0, 0.0, 0.0, 1.0);\n"
1966          << "            }\n"
1967          << "        }\n"
1968          << "        td.color = color;\n"
1969          << "    }\n"
1970          << "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
1971          << "}\n";
1972     programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1973 
1974     std::ostringstream mesh;
1975     mesh << "#version 450\n"
1976          << "#extension GL_EXT_mesh_shader : enable\n"
1977          << "\n"
1978          << "layout (local_size_x=1) in;\n"
1979          << "layout (triangles) out;\n"
1980          << "layout (max_vertices=3, max_primitives=1) out;\n"
1981          << "\n"
1982          << "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
1983          << taskPayloadDecl << "\n"
1984          << "void main ()\n"
1985          << "{\n"
1986          // Verify data one more time from the mesh shader invocation.
1987          << "    uint verificationResult = 1u;\n"
1988          << "    for (uint i = 0u; i < " << kLocalInvocations << "; ++i) {\n"
1989          << "        if (td.verificationCodes[i] != (" << kLocalInvocations2 << " - i)) {\n"
1990          << "            verificationResult = 0u;\n"
1991          << "            break;\n"
1992          << "        }\n"
1993          << "    }\n"
1994          << "    const vec4 finalColor = ((verificationResult == 0u) ? vec4(0.0, 0.0, 0.0, 1.0) : td.color);\n"
1995          << "\n"
1996          << "    SetMeshOutputsEXT(3u, 1u);\n"
1997          << "\n"
1998          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
1999          << "    gl_MeshVerticesEXT[1].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
2000          << "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
2001          << "\n"
2002          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
2003          << "    primitiveColor[0] = finalColor;\n"
2004          << "}\n";
2005     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2006 }
2007 
generateReferenceLevel()2008 void PayloadReadInstance::generateReferenceLevel()
2009 {
2010     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2011 }
2012 
2013 // Test with custom per-vertex and per-primitive attributes of different types.
2014 class CustomAttributesCase : public MeshShaderMiscCase
2015 {
2016 public:
CustomAttributesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2017     CustomAttributesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2018         : MeshShaderMiscCase(testCtx, name, std::move(params))
2019     {
2020     }
~CustomAttributesCase(void)2021     virtual ~CustomAttributesCase(void)
2022     {
2023     }
2024 
2025     TestInstance *createInstance(Context &context) const override;
2026     void checkSupport(Context &context) const override;
2027     void initPrograms(vk::SourceCollections &programCollection) const override;
2028 };
2029 
2030 class CustomAttributesInstance : public MeshShaderMiscInstance
2031 {
2032 public:
CustomAttributesInstance(Context & context,const MiscTestParams * params)2033     CustomAttributesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
2034     {
2035     }
~CustomAttributesInstance(void)2036     virtual ~CustomAttributesInstance(void)
2037     {
2038     }
2039 
2040     void generateReferenceLevel() override;
2041     tcu::TestStatus iterate(void) override;
2042 };
2043 
createInstance(Context & context) const2044 TestInstance *CustomAttributesCase::createInstance(Context &context) const
2045 {
2046     return new CustomAttributesInstance(context, m_params.get());
2047 }
2048 
checkSupport(Context & context) const2049 void CustomAttributesCase::checkSupport(Context &context) const
2050 {
2051     MeshShaderMiscCase::checkSupport(context);
2052 
2053     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_VIEWPORT);
2054     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SHADER_CLIP_DISTANCE);
2055 }
2056 
initPrograms(vk::SourceCollections & programCollection) const2057 void CustomAttributesCase::initPrograms(vk::SourceCollections &programCollection) const
2058 {
2059     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2060 
2061     std::ostringstream frag;
2062     frag << "#version 450\n"
2063          << "#extension GL_EXT_mesh_shader : enable\n"
2064          << "\n"
2065          << "layout (location=0) in vec4 customAttribute1;\n"
2066          << "layout (location=1) in flat float customAttribute2;\n"
2067          << "layout (location=2) in flat int customAttribute3;\n"
2068          << "\n"
2069          << "layout (location=3) in perprimitiveEXT flat uvec4 customAttribute4;\n"
2070          << "layout (location=4) in perprimitiveEXT float customAttribute5;\n"
2071          << "\n"
2072          << "layout (location=0) out vec4 outColor;\n"
2073          << "\n"
2074          << "void main ()\n"
2075          << "{\n"
2076          << "    bool goodPrimitiveID = (gl_PrimitiveID == 1000 || gl_PrimitiveID == 1001);\n"
2077          << "    bool goodViewportIndex = (gl_ViewportIndex == 1);\n"
2078          << "    bool goodCustom1 = (customAttribute1.x >= 0.25 && customAttribute1.x <= 0.5 &&\n"
2079          << "                        customAttribute1.y >= 0.5  && customAttribute1.y <= 1.0 &&\n"
2080          << "                        customAttribute1.z >= 10.0 && customAttribute1.z <= 20.0 &&\n"
2081          << "                        customAttribute1.w == 3.0);\n"
2082          << "    bool goodCustom2 = (customAttribute2 == 1.0 || customAttribute2 == 2.0);\n"
2083          << "    bool goodCustom3 = (customAttribute3 == 3 || customAttribute3 == 4);\n"
2084          << "    bool goodCustom4 = ((gl_PrimitiveID == 1000 && customAttribute4 == uvec4(100, 101, 102, 103)) ||\n"
2085          << "                        (gl_PrimitiveID == 1001 && customAttribute4 == uvec4(200, 201, 202, 203)));\n"
2086          << "    bool goodCustom5 = ((gl_PrimitiveID == 1000 && customAttribute5 == 6.0) ||\n"
2087          << "                        (gl_PrimitiveID == 1001 && customAttribute5 == 7.0));\n"
2088          << "    \n"
2089          << "    if (goodPrimitiveID && goodViewportIndex && goodCustom1 && goodCustom2 && goodCustom3 && goodCustom4 "
2090             "&& goodCustom5) {\n"
2091          << "        outColor = vec4(0.0, 0.0, 1.0, 1.0);\n"
2092          << "    } else {\n"
2093          << "        outColor = vec4(0.0, 0.0, 0.0, 1.0);\n"
2094          << "    }\n"
2095          << "}\n";
2096     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
2097 
2098     std::ostringstream pvdDataDeclStream;
2099     pvdDataDeclStream << "    vec4 positions[4];\n"
2100                       << "    float pointSizes[4];\n"
2101                       << "    float clipDistances[4];\n"
2102                       << "    vec4 custom1[4];\n"
2103                       << "    float custom2[4];\n"
2104                       << "    int custom3[4];\n";
2105     const auto pvdDataDecl = pvdDataDeclStream.str();
2106 
2107     std::ostringstream ppdDataDeclStream;
2108     ppdDataDeclStream << "    int primitiveIds[2];\n"
2109                       << "    int viewportIndices[2];\n"
2110                       << "    uvec4 custom4[2];\n"
2111                       << "    float custom5[2];\n";
2112     const auto ppdDataDecl = ppdDataDeclStream.str();
2113 
2114     std::ostringstream bindingsDeclStream;
2115     bindingsDeclStream << "layout (set=0, binding=0, std430) buffer PerVertexData {\n"
2116                        << pvdDataDecl << "} pvd;\n"
2117                        << "layout (set=0, binding=1) uniform PerPrimitiveData {\n"
2118                        << ppdDataDecl << "} ppd;\n"
2119                        << "\n";
2120     const auto bindingsDecl = bindingsDeclStream.str();
2121 
2122     std::ostringstream taskDataStream;
2123     taskDataStream << "struct TaskData {\n"
2124                    << pvdDataDecl << ppdDataDecl << "};\n"
2125                    << "taskPayloadSharedEXT TaskData td;\n"
2126                    << "\n";
2127     const auto taskDataDecl = taskDataStream.str();
2128 
2129     const auto taskShader = m_params->needsTaskShader();
2130 
2131     const auto meshPvdPrefix = (taskShader ? "td" : "pvd");
2132     const auto meshPpdPrefix = (taskShader ? "td" : "ppd");
2133 
2134     std::ostringstream mesh;
2135     mesh << "#version 450\n"
2136          << "#extension GL_EXT_mesh_shader : enable\n"
2137          << "\n"
2138          << "layout (local_size_x=1) in;\n"
2139          << "layout (max_primitives=2, max_vertices=4) out;\n"
2140          << "layout (triangles) out;\n"
2141          << "\n"
2142          << "out gl_MeshPerVertexEXT {\n"
2143          << "    vec4  gl_Position;\n"
2144          << "    float gl_PointSize;\n"
2145          << "    float gl_ClipDistance[1];\n"
2146          << "} gl_MeshVerticesEXT[];\n"
2147          << "\n"
2148          << "layout (location=0) out vec4 customAttribute1[];\n"
2149          << "layout (location=1) out flat float customAttribute2[];\n"
2150          << "layout (location=2) out int customAttribute3[];\n"
2151          << "\n"
2152          << "layout (location=3) out perprimitiveEXT uvec4 customAttribute4[];\n"
2153          << "layout (location=4) out perprimitiveEXT float customAttribute5[];\n"
2154          << "\n"
2155          << "out perprimitiveEXT gl_MeshPerPrimitiveEXT {\n"
2156          << "  int gl_PrimitiveID;\n"
2157          << "  int gl_ViewportIndex;\n"
2158          << "} gl_MeshPrimitivesEXT[];\n"
2159          << "\n"
2160          << (taskShader ? taskDataDecl : bindingsDecl) << "void main ()\n"
2161          << "{\n"
2162          << "    SetMeshOutputsEXT(4u, 2u);\n"
2163          << "\n"
2164          << "    gl_MeshVerticesEXT[0].gl_Position = " << meshPvdPrefix
2165          << ".positions[0]; //vec4(-1.0, -1.0, 0.0, 1.0)\n"
2166          << "    gl_MeshVerticesEXT[1].gl_Position = " << meshPvdPrefix
2167          << ".positions[1]; //vec4( 1.0, -1.0, 0.0, 1.0)\n"
2168          << "    gl_MeshVerticesEXT[2].gl_Position = " << meshPvdPrefix
2169          << ".positions[2]; //vec4(-1.0,  1.0, 0.0, 1.0)\n"
2170          << "    gl_MeshVerticesEXT[3].gl_Position = " << meshPvdPrefix
2171          << ".positions[3]; //vec4( 1.0,  1.0, 0.0, 1.0)\n"
2172          << "\n"
2173          << "    gl_MeshVerticesEXT[0].gl_PointSize = " << meshPvdPrefix << ".pointSizes[0]; //1.0\n"
2174          << "    gl_MeshVerticesEXT[1].gl_PointSize = " << meshPvdPrefix << ".pointSizes[1]; //1.0\n"
2175          << "    gl_MeshVerticesEXT[2].gl_PointSize = " << meshPvdPrefix << ".pointSizes[2]; //1.0\n"
2176          << "    gl_MeshVerticesEXT[3].gl_PointSize = " << meshPvdPrefix << ".pointSizes[3]; //1.0\n"
2177          << "\n"
2178          << "    // Remove geometry on the right side.\n"
2179          << "    gl_MeshVerticesEXT[0].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[0]; // 1.0\n"
2180          << "    gl_MeshVerticesEXT[1].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[1]; //-1.0\n"
2181          << "    gl_MeshVerticesEXT[2].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[2]; // 1.0\n"
2182          << "    gl_MeshVerticesEXT[3].gl_ClipDistance[0] = " << meshPvdPrefix << ".clipDistances[3]; //-1.0\n"
2183          << "    \n"
2184          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
2185          << "    gl_PrimitiveTriangleIndicesEXT[1] = uvec3(2, 3, 1);\n"
2186          << "\n"
2187          << "    gl_MeshPrimitivesEXT[0].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[0]; //1000\n"
2188          << "    gl_MeshPrimitivesEXT[1].gl_PrimitiveID = " << meshPpdPrefix << ".primitiveIds[1]; //1001\n"
2189          << "\n"
2190          << "    gl_MeshPrimitivesEXT[0].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[0]; //1\n"
2191          << "    gl_MeshPrimitivesEXT[1].gl_ViewportIndex = " << meshPpdPrefix << ".viewportIndices[1]; //1\n"
2192          << "\n"
2193          << "    // Custom per-vertex attributes\n"
2194          << "    customAttribute1[0] = " << meshPvdPrefix << ".custom1[0]; //vec4(0.25, 0.5, 10.0, 3.0)\n"
2195          << "    customAttribute1[1] = " << meshPvdPrefix << ".custom1[1]; //vec4(0.25, 1.0, 20.0, 3.0)\n"
2196          << "    customAttribute1[2] = " << meshPvdPrefix << ".custom1[2]; //vec4( 0.5, 0.5, 20.0, 3.0)\n"
2197          << "    customAttribute1[3] = " << meshPvdPrefix << ".custom1[3]; //vec4( 0.5, 1.0, 10.0, 3.0)\n"
2198          << "\n"
2199          << "    customAttribute2[0] = " << meshPvdPrefix << ".custom2[0]; //1.0f\n"
2200          << "    customAttribute2[1] = " << meshPvdPrefix << ".custom2[1]; //1.0f\n"
2201          << "    customAttribute2[2] = " << meshPvdPrefix << ".custom2[2]; //2.0f\n"
2202          << "    customAttribute2[3] = " << meshPvdPrefix << ".custom2[3]; //2.0f\n"
2203          << "\n"
2204          << "    customAttribute3[0] = " << meshPvdPrefix << ".custom3[0]; //3\n"
2205          << "    customAttribute3[1] = " << meshPvdPrefix << ".custom3[1]; //3\n"
2206          << "    customAttribute3[2] = " << meshPvdPrefix << ".custom3[2]; //4\n"
2207          << "    customAttribute3[3] = " << meshPvdPrefix << ".custom3[3]; //4\n"
2208          << "\n"
2209          << "    // Custom per-primitive attributes.\n"
2210          << "    customAttribute4[0] = " << meshPpdPrefix << ".custom4[0]; //uvec4(100, 101, 102, 103)\n"
2211          << "    customAttribute4[1] = " << meshPpdPrefix << ".custom4[1]; //uvec4(200, 201, 202, 203)\n"
2212          << "\n"
2213          << "    customAttribute5[0] = " << meshPpdPrefix << ".custom5[0]; //6.0\n"
2214          << "    customAttribute5[1] = " << meshPpdPrefix << ".custom5[1]; //7.0\n"
2215          << "}\n";
2216     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2217 
2218     if (taskShader)
2219     {
2220         const auto &meshCount = m_params->meshCount;
2221         std::ostringstream task;
2222         task << "#version 450\n"
2223              << "#extension GL_EXT_mesh_shader : enable\n"
2224              << "\n"
2225              << taskDataDecl << bindingsDecl << "void main ()\n"
2226              << "{\n"
2227              << "    td.positions[0] = pvd.positions[0];\n"
2228              << "    td.positions[1] = pvd.positions[1];\n"
2229              << "    td.positions[2] = pvd.positions[2];\n"
2230              << "    td.positions[3] = pvd.positions[3];\n"
2231              << "\n"
2232              << "    td.pointSizes[0] = pvd.pointSizes[0];\n"
2233              << "    td.pointSizes[1] = pvd.pointSizes[1];\n"
2234              << "    td.pointSizes[2] = pvd.pointSizes[2];\n"
2235              << "    td.pointSizes[3] = pvd.pointSizes[3];\n"
2236              << "\n"
2237              << "    td.clipDistances[0] = pvd.clipDistances[0];\n"
2238              << "    td.clipDistances[1] = pvd.clipDistances[1];\n"
2239              << "    td.clipDistances[2] = pvd.clipDistances[2];\n"
2240              << "    td.clipDistances[3] = pvd.clipDistances[3];\n"
2241              << "\n"
2242              << "    td.custom1[0] = pvd.custom1[0];\n"
2243              << "    td.custom1[1] = pvd.custom1[1];\n"
2244              << "    td.custom1[2] = pvd.custom1[2];\n"
2245              << "    td.custom1[3] = pvd.custom1[3];\n"
2246              << "\n"
2247              << "    td.custom2[0] = pvd.custom2[0];\n"
2248              << "    td.custom2[1] = pvd.custom2[1];\n"
2249              << "    td.custom2[2] = pvd.custom2[2];\n"
2250              << "    td.custom2[3] = pvd.custom2[3];\n"
2251              << "\n"
2252              << "    td.custom3[0] = pvd.custom3[0];\n"
2253              << "    td.custom3[1] = pvd.custom3[1];\n"
2254              << "    td.custom3[2] = pvd.custom3[2];\n"
2255              << "    td.custom3[3] = pvd.custom3[3];\n"
2256              << "\n"
2257              << "    td.primitiveIds[0] = ppd.primitiveIds[0];\n"
2258              << "    td.primitiveIds[1] = ppd.primitiveIds[1];\n"
2259              << "\n"
2260              << "    td.viewportIndices[0] = ppd.viewportIndices[0];\n"
2261              << "    td.viewportIndices[1] = ppd.viewportIndices[1];\n"
2262              << "\n"
2263              << "    td.custom4[0] = ppd.custom4[0];\n"
2264              << "    td.custom4[1] = ppd.custom4[1];\n"
2265              << "\n"
2266              << "    td.custom5[0] = ppd.custom5[0];\n"
2267              << "    td.custom5[1] = ppd.custom5[1];\n"
2268              << "\n"
2269              << "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
2270              << "}\n";
2271         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
2272     }
2273 }
2274 
generateReferenceLevel()2275 void CustomAttributesInstance::generateReferenceLevel()
2276 {
2277     const auto format    = getOutputFormat();
2278     const auto tcuFormat = mapVkFormat(format);
2279 
2280     const auto iWidth  = static_cast<int>(m_params->width);
2281     const auto iHeight = static_cast<int>(m_params->height);
2282 
2283     const auto halfWidth  = iWidth / 2;
2284     const auto halfHeight = iHeight / 2;
2285 
2286     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
2287 
2288     const auto access     = m_referenceLevel->getAccess();
2289     const auto clearColor = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
2290     const auto blueColor  = tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
2291 
2292     tcu::clear(access, clearColor);
2293 
2294     // Fill the top left quarter.
2295     for (int y = 0; y < halfWidth; ++y)
2296         for (int x = 0; x < halfHeight; ++x)
2297         {
2298             access.setPixel(blueColor, x, y);
2299         }
2300 }
2301 
iterate()2302 tcu::TestStatus CustomAttributesInstance::iterate()
2303 {
2304     struct PerVertexData
2305     {
2306         tcu::Vec4 positions[4];
2307         float pointSizes[4];
2308         float clipDistances[4];
2309         tcu::Vec4 custom1[4];
2310         float custom2[4];
2311         int32_t custom3[4];
2312     };
2313 
2314     struct PerPrimitiveData
2315     {
2316         // Note some of these are declared as vectors to match the std140 layout.
2317         tcu::IVec4 primitiveIds[2];
2318         tcu::IVec4 viewportIndices[2];
2319         tcu::UVec4 custom4[2];
2320         tcu::Vec4 custom5[2];
2321     };
2322 
2323     const auto &vkd       = m_context.getDeviceInterface();
2324     const auto device     = m_context.getDevice();
2325     auto &alloc           = m_context.getDefaultAllocator();
2326     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
2327     const auto queue      = m_context.getUniversalQueue();
2328 
2329     const auto imageFormat = getOutputFormat();
2330     const auto tcuFormat   = mapVkFormat(imageFormat);
2331     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
2332     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2333 
2334     const auto &binaries = m_context.getBinaryCollection();
2335     const auto hasTask   = binaries.contains("task");
2336     const auto bufStages = (hasTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
2337 
2338     const VkImageCreateInfo colorBufferInfo = {
2339         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2340         nullptr,                             // const void* pNext;
2341         0u,                                  // VkImageCreateFlags flags;
2342         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
2343         imageFormat,                         // VkFormat format;
2344         imageExtent,                         // VkExtent3D extent;
2345         1u,                                  // uint32_t mipLevels;
2346         1u,                                  // uint32_t arrayLayers;
2347         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
2348         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
2349         imageUsage,                          // VkImageUsageFlags usage;
2350         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
2351         0u,                                  // uint32_t queueFamilyIndexCount;
2352         nullptr,                             // const uint32_t* pQueueFamilyIndices;
2353         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
2354     };
2355 
2356     // Create color image and view.
2357     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2358     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2359     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2360     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2361 
2362     // Create a memory buffer for verification.
2363     const auto verificationBufferSize =
2364         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2365     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2366     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2367 
2368     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2369     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
2370     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
2371 
2372     // This needs to match what the fragment shader will expect.
2373     const PerVertexData perVertexData = {
2374         // tcu::Vec4 positions[4];
2375         {
2376             tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
2377             tcu::Vec4(1.0f, -1.0f, 0.0f, 1.0f),
2378             tcu::Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
2379             tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
2380         },
2381         // float pointSizes[4];
2382         {
2383             1.0f,
2384             1.0f,
2385             1.0f,
2386             1.0f,
2387         },
2388         // float clipDistances[4];
2389         {
2390             1.0f,
2391             -1.0f,
2392             1.0f,
2393             -1.0f,
2394         },
2395         // tcu::Vec4 custom1[4];
2396         {
2397             tcu::Vec4(0.25, 0.5, 10.0, 3.0),
2398             tcu::Vec4(0.25, 1.0, 20.0, 3.0),
2399             tcu::Vec4(0.5, 0.5, 20.0, 3.0),
2400             tcu::Vec4(0.5, 1.0, 10.0, 3.0),
2401         },
2402         // float custom2[4];
2403         {
2404             1.0f,
2405             1.0f,
2406             2.0f,
2407             2.0f,
2408         },
2409         // int32_t custom3[4];
2410         {3, 3, 4, 4},
2411     };
2412 
2413     // This needs to match what the fragment shader will expect. Reminder: some of these are declared as gvec4 to match the std140
2414     // layout, but only the first component is actually used.
2415     const PerPrimitiveData perPrimitiveData = {
2416         // int primitiveIds[2];
2417         {
2418             tcu::IVec4(1000, 0, 0, 0),
2419             tcu::IVec4(1001, 0, 0, 0),
2420         },
2421         // int viewportIndices[2];
2422         {
2423             tcu::IVec4(1, 0, 0, 0),
2424             tcu::IVec4(1, 0, 0, 0),
2425         },
2426         // uvec4 custom4[2];
2427         {
2428             tcu::UVec4(100u, 101u, 102u, 103u),
2429             tcu::UVec4(200u, 201u, 202u, 203u),
2430         },
2431         // float custom5[2];
2432         {
2433             tcu::Vec4(6.0f, 0.0f, 0.0f, 0.0f),
2434             tcu::Vec4(7.0f, 0.0f, 0.0f, 0.0f),
2435         },
2436     };
2437 
2438     // Create and fill buffers with this data.
2439     const auto pvdSize = static_cast<VkDeviceSize>(sizeof(perVertexData));
2440     const auto pvdInfo = makeBufferCreateInfo(pvdSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
2441     BufferWithMemory pvdData(vkd, device, alloc, pvdInfo, MemoryRequirement::HostVisible);
2442     auto &pvdAlloc = pvdData.getAllocation();
2443     void *pvdPtr   = pvdAlloc.getHostPtr();
2444 
2445     const auto ppdSize = static_cast<VkDeviceSize>(sizeof(perPrimitiveData));
2446     const auto ppdInfo = makeBufferCreateInfo(ppdSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
2447     BufferWithMemory ppdData(vkd, device, alloc, ppdInfo, MemoryRequirement::HostVisible);
2448     auto &ppdAlloc = ppdData.getAllocation();
2449     void *ppdPtr   = ppdAlloc.getHostPtr();
2450 
2451     deMemcpy(pvdPtr, &perVertexData, sizeof(perVertexData));
2452     deMemcpy(ppdPtr, &perPrimitiveData, sizeof(perPrimitiveData));
2453 
2454     flushAlloc(vkd, device, pvdAlloc);
2455     flushAlloc(vkd, device, ppdAlloc);
2456 
2457     // Descriptor set layout.
2458     DescriptorSetLayoutBuilder setLayoutBuilder;
2459     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bufStages);
2460     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, bufStages);
2461     const auto setLayout = setLayoutBuilder.build(vkd, device);
2462 
2463     // Create and update descriptor set.
2464     DescriptorPoolBuilder descriptorPoolBuilder;
2465     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2466     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
2467     const auto descriptorPool =
2468         descriptorPoolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2469     const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
2470 
2471     DescriptorSetUpdateBuilder updateBuilder;
2472     const auto storageBufferInfo = makeDescriptorBufferInfo(pvdData.get(), 0ull, pvdSize);
2473     const auto uniformBufferInfo = makeDescriptorBufferInfo(ppdData.get(), 0ull, ppdSize);
2474     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
2475                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &storageBufferInfo);
2476     updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
2477                               VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferInfo);
2478     updateBuilder.update(vkd, device);
2479 
2480     // Pipeline layout.
2481     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
2482 
2483     // Shader modules.
2484     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
2485     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
2486 
2487     Move<VkShaderModule> taskShader;
2488     if (hasTask)
2489         taskShader = createShaderModule(vkd, device, binaries.get("task"));
2490 
2491     // Render pass.
2492     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2493 
2494     // Framebuffer.
2495     const auto framebuffer =
2496         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2497 
2498     // Viewport and scissor.
2499     const auto topHalf = makeViewport(imageExtent.width, imageExtent.height / 2u);
2500     const std::vector<VkViewport> viewports{makeViewport(imageExtent), topHalf};
2501     const std::vector<VkRect2D> scissors(2u, makeRect2D(imageExtent));
2502 
2503     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
2504                                                fragShader.get(), renderPass.get(), viewports, scissors);
2505 
2506     // Command pool and buffer.
2507     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
2508     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2509     const auto cmdBuffer    = cmdBufferPtr.get();
2510 
2511     beginCommandBuffer(vkd, cmdBuffer);
2512 
2513     // Run pipeline.
2514     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
2515     const auto drawCount = m_params->drawCount();
2516     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2517     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2518     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
2519                               &descriptorSet.get(), 0u, nullptr);
2520     vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
2521     endRenderPass(vkd, cmdBuffer);
2522 
2523     // Copy color buffer to verification buffer.
2524     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2525     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
2526     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
2527     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
2528 
2529     const auto preCopyBarrier =
2530         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2531                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2532     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
2533     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
2534 
2535     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
2536                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2537     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2538                              verificationBuffer.get(), 1u, &copyRegion);
2539     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
2540                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2541 
2542     endCommandBuffer(vkd, cmdBuffer);
2543     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2544 
2545     // Generate reference image and compare results.
2546     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2547     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
2548 
2549     generateReferenceLevel();
2550     invalidateAlloc(vkd, device, verificationBufferAlloc);
2551     if (!verifyResult(verificationAccess))
2552         TCU_FAIL("Result does not match reference; check log for details");
2553 
2554     return tcu::TestStatus::pass("Pass");
2555 }
2556 
2557 // Tests that use push constants in the new stages.
2558 class PushConstantCase : public MeshShaderMiscCase
2559 {
2560 public:
PushConstantCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2561     PushConstantCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2562         : MeshShaderMiscCase(testCtx, name, std::move(params))
2563     {
2564     }
2565 
2566     void initPrograms(vk::SourceCollections &programCollection) const override;
2567     TestInstance *createInstance(Context &context) const override;
2568 };
2569 
2570 class PushConstantInstance : public MeshShaderMiscInstance
2571 {
2572 public:
PushConstantInstance(Context & context,const MiscTestParams * params)2573     PushConstantInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
2574     {
2575     }
2576 
2577     void generateReferenceLevel() override;
2578     tcu::TestStatus iterate() override;
2579 };
2580 
createInstance(Context & context) const2581 TestInstance *PushConstantCase::createInstance(Context &context) const
2582 {
2583     return new PushConstantInstance(context, m_params.get());
2584 }
2585 
generateReferenceLevel()2586 void PushConstantInstance::generateReferenceLevel()
2587 {
2588     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2589 }
2590 
initPrograms(vk::SourceCollections & programCollection) const2591 void PushConstantCase::initPrograms(vk::SourceCollections &programCollection) const
2592 {
2593     const auto buildOptions  = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2594     const auto useTaskShader = m_params->needsTaskShader();
2595     const auto pcNumFloats   = (useTaskShader ? 2u : 4u);
2596 
2597     std::ostringstream pushConstantStream;
2598     pushConstantStream << "layout (push_constant, std430) uniform PushConstantBlock {\n"
2599                        << "    layout (offset=${PCOFFSET}) float values[" << pcNumFloats << "];\n"
2600                        << "} pc;\n"
2601                        << "\n";
2602     const tcu::StringTemplate pushConstantsTemplate(pushConstantStream.str());
2603     using TemplateMap = std::map<std::string, std::string>;
2604 
2605     std::ostringstream taskDataStream;
2606     taskDataStream << "struct TaskData {\n"
2607                    << "    float values[2];\n"
2608                    << "};\n"
2609                    << "taskPayloadSharedEXT TaskData td;\n"
2610                    << "\n";
2611     const auto taskDataDecl = taskDataStream.str();
2612 
2613     if (useTaskShader)
2614     {
2615         TemplateMap taskMap;
2616         taskMap["PCOFFSET"] = std::to_string(2u * sizeof(float));
2617 
2618         const auto &meshCount = m_params->meshCount;
2619         std::ostringstream task;
2620         task << "#version 450\n"
2621              << "#extension GL_EXT_mesh_shader : enable\n"
2622              << "\n"
2623              << "layout(local_size_x=1) in;\n"
2624              << "\n"
2625              << taskDataDecl << pushConstantsTemplate.specialize(taskMap) << "void main ()\n"
2626              << "{\n"
2627              << "    td.values[0] = pc.values[0];\n"
2628              << "    td.values[1] = pc.values[1];\n"
2629              << "\n"
2630              << "    EmitMeshTasksEXT(" << meshCount.x() << ", " << meshCount.y() << ", " << meshCount.z() << ");\n"
2631              << "}\n";
2632         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
2633     }
2634 
2635     {
2636         const std::string blue  = (useTaskShader ? "td.values[0] + pc.values[0]" : "pc.values[0] + pc.values[2]");
2637         const std::string alpha = (useTaskShader ? "td.values[1] + pc.values[1]" : "pc.values[1] + pc.values[3]");
2638 
2639         TemplateMap meshMap;
2640         meshMap["PCOFFSET"] = "0";
2641 
2642         std::ostringstream mesh;
2643         mesh << "#version 450\n"
2644              << "#extension GL_EXT_mesh_shader : enable\n"
2645              << "\n"
2646              << "layout(local_size_x=1) in;\n"
2647              << "layout(triangles) out;\n"
2648              << "layout(max_vertices=3, max_primitives=1) out;\n"
2649              << "\n"
2650              << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
2651              << "\n"
2652              << pushConstantsTemplate.specialize(meshMap) << (useTaskShader ? taskDataDecl : "") << "void main ()\n"
2653              << "{\n"
2654              << "    SetMeshOutputsEXT(3u, 1u);\n"
2655              << "\n"
2656              << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
2657              << "    gl_MeshVerticesEXT[1].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
2658              << "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
2659              << "\n"
2660              << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
2661              << "    triangleColor[0] = vec4(0.0, 0.0, " << blue << ", " << alpha << ");\n"
2662              << "}\n";
2663         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2664     }
2665 
2666     // Add default fragment shader.
2667     MeshShaderMiscCase::initPrograms(programCollection);
2668 }
2669 
iterate()2670 tcu::TestStatus PushConstantInstance::iterate()
2671 {
2672     const auto &vkd       = m_context.getDeviceInterface();
2673     const auto device     = m_context.getDevice();
2674     auto &alloc           = m_context.getDefaultAllocator();
2675     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
2676     const auto queue      = m_context.getUniversalQueue();
2677 
2678     const auto imageFormat = getOutputFormat();
2679     const auto tcuFormat   = mapVkFormat(imageFormat);
2680     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
2681     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2682 
2683     const auto &binaries = m_context.getBinaryCollection();
2684     const auto hasTask   = binaries.contains("task");
2685 
2686     const VkImageCreateInfo colorBufferInfo = {
2687         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2688         nullptr,                             // const void* pNext;
2689         0u,                                  // VkImageCreateFlags flags;
2690         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
2691         imageFormat,                         // VkFormat format;
2692         imageExtent,                         // VkExtent3D extent;
2693         1u,                                  // uint32_t mipLevels;
2694         1u,                                  // uint32_t arrayLayers;
2695         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
2696         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
2697         imageUsage,                          // VkImageUsageFlags usage;
2698         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
2699         0u,                                  // uint32_t queueFamilyIndexCount;
2700         nullptr,                             // const uint32_t* pQueueFamilyIndices;
2701         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
2702     };
2703 
2704     // Create color image and view.
2705     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
2706     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
2707     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
2708     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
2709 
2710     // Create a memory buffer for verification.
2711     const auto verificationBufferSize =
2712         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
2713     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2714     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
2715 
2716     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
2717     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
2718     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
2719 
2720     // Push constant ranges.
2721     std::vector<float> pcData{0.25f, 0.25f, 0.75f, 0.75f};
2722     const auto pcSize     = static_cast<uint32_t>(de::dataSize(pcData));
2723     const auto pcHalfSize = pcSize / 2u;
2724 
2725     std::vector<VkPushConstantRange> pcRanges;
2726     if (hasTask)
2727     {
2728         pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcHalfSize));
2729         pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_TASK_BIT_EXT, pcHalfSize, pcHalfSize));
2730     }
2731     else
2732     {
2733         pcRanges.push_back(makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize));
2734     }
2735 
2736     // Pipeline layout.
2737     const auto pipelineLayout =
2738         makePipelineLayout(vkd, device, 0u, nullptr, static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges));
2739 
2740     // Shader modules.
2741     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
2742     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
2743 
2744     Move<VkShaderModule> taskShader;
2745     if (hasTask)
2746         taskShader = createShaderModule(vkd, device, binaries.get("task"));
2747 
2748     // Render pass.
2749     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
2750 
2751     // Framebuffer.
2752     const auto framebuffer =
2753         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
2754 
2755     // Viewport and scissor.
2756     const std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
2757     const std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
2758 
2759     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
2760                                                fragShader.get(), renderPass.get(), viewports, scissors);
2761 
2762     // Command pool and buffer.
2763     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
2764     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2765     const auto cmdBuffer    = cmdBufferPtr.get();
2766 
2767     beginCommandBuffer(vkd, cmdBuffer);
2768 
2769     // Run pipeline.
2770     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
2771     const auto drawCount = m_params->drawCount();
2772     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
2773     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
2774     for (const auto &range : pcRanges)
2775         vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size,
2776                              reinterpret_cast<const char *>(pcData.data()) + range.offset);
2777     vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
2778     endRenderPass(vkd, cmdBuffer);
2779 
2780     // Copy color buffer to verification buffer.
2781     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
2782     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
2783     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
2784     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
2785 
2786     const auto preCopyBarrier =
2787         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2788                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
2789     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
2790     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
2791 
2792     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
2793                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
2794     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2795                              verificationBuffer.get(), 1u, &copyRegion);
2796     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
2797                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
2798 
2799     endCommandBuffer(vkd, cmdBuffer);
2800     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2801 
2802     // Generate reference image and compare results.
2803     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
2804     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
2805 
2806     generateReferenceLevel();
2807     invalidateAlloc(vkd, device, verificationBufferAlloc);
2808     if (!verifyResult(verificationAccess))
2809         TCU_FAIL("Result does not match reference; check log for details");
2810 
2811     return tcu::TestStatus::pass("Pass");
2812 }
2813 
2814 // Use large work group size, large number of vertices and large number of primitives.
2815 struct MaximizeThreadsParams : public MiscTestParams
2816 {
MaximizeThreadsParamsvkt::MeshShader::__anonb7c155300111::MaximizeThreadsParams2817     MaximizeThreadsParams(const tcu::Maybe<tcu::UVec3> &taskCount_, const tcu::UVec3 &meshCount_, uint32_t width_,
2818                           uint32_t height_, uint32_t localSize_, uint32_t numVertices_, uint32_t numPrimitives_)
2819         : MiscTestParams(taskCount_, meshCount_, width_, height_)
2820         , localSize(localSize_)
2821         , numVertices(numVertices_)
2822         , numPrimitives(numPrimitives_)
2823     {
2824     }
2825 
2826     uint32_t localSize;
2827     uint32_t numVertices;
2828     uint32_t numPrimitives;
2829 
checkSupportvkt::MeshShader::__anonb7c155300111::MaximizeThreadsParams2830     void checkSupport(Context &context) const
2831     {
2832         const auto &properties = context.getMeshShaderPropertiesEXT();
2833 
2834         if (localSize > properties.maxMeshWorkGroupSize[0])
2835             TCU_THROW(NotSupportedError, "Required local size not supported");
2836 
2837         if (numVertices > properties.maxMeshOutputVertices)
2838             TCU_THROW(NotSupportedError, "Required number of output vertices not supported");
2839 
2840         if (numPrimitives > properties.maxMeshOutputPrimitives)
2841             TCU_THROW(NotSupportedError, "Required number of output primitives not supported");
2842     }
2843 };
2844 
2845 // Focus on the number of primitives.
2846 class MaximizePrimitivesCase : public MeshShaderMiscCase
2847 {
2848 public:
MaximizePrimitivesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2849     MaximizePrimitivesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2850         : MeshShaderMiscCase(testCtx, name, std::move(params))
2851     {
2852         const auto mtParams = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2853         DE_ASSERT(mtParams);
2854         DE_UNREF(mtParams); // For release builds.
2855     }
2856 
2857     void initPrograms(vk::SourceCollections &programCollection) const override;
2858     void checkSupport(Context &context) const override;
2859     TestInstance *createInstance(Context &context) const override;
2860 };
2861 
2862 class MaximizePrimitivesInstance : public MeshShaderMiscInstance
2863 {
2864 public:
MaximizePrimitivesInstance(Context & context,const MiscTestParams * params)2865     MaximizePrimitivesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
2866     {
2867     }
2868 
2869     void generateReferenceLevel() override;
2870 };
2871 
createInstance(Context & context) const2872 TestInstance *MaximizePrimitivesCase::createInstance(Context &context) const
2873 {
2874     return new MaximizePrimitivesInstance(context, m_params.get());
2875 }
2876 
checkSupport(Context & context) const2877 void MaximizePrimitivesCase::checkSupport(Context &context) const
2878 {
2879     MeshShaderMiscCase::checkSupport(context);
2880 
2881     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2882     params->checkSupport(context);
2883 }
2884 
initPrograms(vk::SourceCollections & programCollection) const2885 void MaximizePrimitivesCase::initPrograms(vk::SourceCollections &programCollection) const
2886 {
2887     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2888     const auto params       = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2889 
2890     DE_ASSERT(!params->needsTaskShader());
2891     MeshShaderMiscCase::initPrograms(programCollection);
2892 
2893     // Idea behind the test: generate 128 vertices, 1 per each pixel in a 128x1 image. Then, use each vertex to generate two points,
2894     // adding the colors of each point using color blending to make sure every point is properly generated.
2895 
2896     DE_ASSERT(params->numPrimitives == params->numVertices * 2u);
2897     DE_ASSERT(params->numVertices == params->width);
2898 
2899     const auto verticesPerInvocation = params->numVertices / params->localSize;
2900     const auto primitivesPerVertex   = params->numPrimitives / params->numVertices;
2901 
2902     std::ostringstream mesh;
2903     mesh << "#version 450\n"
2904          << "#extension GL_EXT_mesh_shader : enable\n"
2905          << "\n"
2906          << "layout(local_size_x=" << params->localSize << ") in;\n"
2907          << "layout(points) out;\n"
2908          << "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
2909          << "\n"
2910          << "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
2911          << "\n"
2912          << "const uint verticesPerInvocation = " << verticesPerInvocation << ";\n"
2913          << "const uint primitivesPerVertex   = " << primitivesPerVertex << ";\n"
2914          << "\n"
2915          << "vec4 colors[primitivesPerVertex] = vec4[](\n"
2916          << "    vec4(0.0, 0.0, 1.0, 1.0),\n"
2917          << "    vec4(1.0, 0.0, 0.0, 1.0)\n"
2918          << ");\n"
2919          << "void main ()\n"
2920          << "{\n"
2921          << "    SetMeshOutputsEXT(" << params->numVertices << ", " << params->numPrimitives << ");\n"
2922          << "    const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation;\n"
2923          << "    for (uint i = 0u; i < verticesPerInvocation; ++i)\n"
2924          << "    {\n"
2925          << "        const uint vertexNumber = firstVertex + i;\n"
2926          << "        const float xCoord = ((float(vertexNumber) + 0.5) / " << params->width << ".0) * 2.0 - 1.0;\n"
2927          << "        const float yCoord = 0.0;\n"
2928          << "        gl_MeshVerticesEXT[vertexNumber].gl_Position = vec4(xCoord, yCoord, 0.0f, 1.0f);\n"
2929          << "        gl_MeshVerticesEXT[vertexNumber].gl_PointSize = 1.0f;\n"
2930          << "        for (uint j = 0u; j < primitivesPerVertex; ++j)\n"
2931          << "        {\n"
2932          << "            const uint primitiveNumber = vertexNumber * primitivesPerVertex + j;\n"
2933          << "            gl_PrimitivePointIndicesEXT[primitiveNumber] = vertexNumber;\n"
2934          << "            pointColor[primitiveNumber] = colors[j];\n"
2935          << "        }\n"
2936          << "    }\n"
2937          << "}\n";
2938     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
2939 }
2940 
generateReferenceLevel()2941 void MaximizePrimitivesInstance::generateReferenceLevel()
2942 {
2943     generateSolidRefLevel(tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
2944 }
2945 
2946 // Focus on the number of vertices.
2947 class MaximizeVerticesCase : public MeshShaderMiscCase
2948 {
2949 public:
MaximizeVerticesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)2950     MaximizeVerticesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
2951         : MeshShaderMiscCase(testCtx, name, std::move(params))
2952     {
2953         const auto mtParams = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2954         DE_ASSERT(mtParams);
2955         DE_UNREF(mtParams); // For release builds.
2956     }
2957 
2958     void initPrograms(vk::SourceCollections &programCollection) const override;
2959     void checkSupport(Context &context) const override;
2960     TestInstance *createInstance(Context &context) const override;
2961 };
2962 
2963 class MaximizeVerticesInstance : public MeshShaderMiscInstance
2964 {
2965 public:
MaximizeVerticesInstance(Context & context,const MiscTestParams * params)2966     MaximizeVerticesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
2967     {
2968     }
2969 
2970     void generateReferenceLevel() override;
2971 };
2972 
createInstance(Context & context) const2973 TestInstance *MaximizeVerticesCase::createInstance(Context &context) const
2974 {
2975     return new MaximizeVerticesInstance(context, m_params.get());
2976 }
2977 
checkSupport(Context & context) const2978 void MaximizeVerticesCase::checkSupport(Context &context) const
2979 {
2980     MeshShaderMiscCase::checkSupport(context);
2981 
2982     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2983     params->checkSupport(context);
2984 }
2985 
initPrograms(vk::SourceCollections & programCollection) const2986 void MaximizeVerticesCase::initPrograms(vk::SourceCollections &programCollection) const
2987 {
2988     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
2989     const auto params       = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
2990 
2991     DE_ASSERT(!params->needsTaskShader());
2992     MeshShaderMiscCase::initPrograms(programCollection);
2993 
2994     // Idea behind the test: cover a framebuffer using a triangle quad per pixel (4 vertices, 2 triangles).
2995     DE_ASSERT(params->numVertices == params->numPrimitives * 2u);
2996     DE_ASSERT(params->numPrimitives == params->width * 2u);
2997 
2998     const auto pixelsPerInvocation     = params->width / params->localSize;
2999     const auto verticesPerPixel        = 4u;
3000     const auto primitivesPerPixel      = 2u;
3001     const auto verticesPerInvocation   = pixelsPerInvocation * verticesPerPixel;
3002     const auto primitivesPerInvocation = pixelsPerInvocation * primitivesPerPixel;
3003 
3004     std::ostringstream mesh;
3005     mesh << "#version 450\n"
3006          << "#extension GL_EXT_mesh_shader : enable\n"
3007          << "\n"
3008          << "layout(local_size_x=" << params->localSize << ") in;\n"
3009          << "layout(triangles) out;\n"
3010          << "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
3011          << "\n"
3012          << "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
3013          << "\n"
3014          << "const uint pixelsPerInvocation     = " << pixelsPerInvocation << ";\n"
3015          << "const uint verticesPerInvocation   = " << verticesPerInvocation << ";\n"
3016          << "const uint primitivesPerInvocation = " << primitivesPerInvocation << ";\n"
3017          << "const uint indicesPerInvocation    = primitivesPerInvocation * 3u;\n"
3018          << "const uint verticesPerPixel        = " << verticesPerPixel << ";\n"
3019          << "const uint primitivesPerPixel      = " << primitivesPerPixel << ";\n"
3020          << "const uint indicesPerPixel         = primitivesPerPixel * 3u;\n"
3021          << "\n"
3022          << "void main ()\n"
3023          << "{\n"
3024          << "    SetMeshOutputsEXT(" << params->numVertices << ", " << params->numPrimitives << ");\n"
3025          << "\n"
3026          << "    const uint firstPixel    = gl_LocalInvocationIndex * pixelsPerInvocation;\n"
3027          << "    const float pixelWidth   = 2.0 / float(" << params->width << ");\n"
3028          << "    const float quarterWidth = pixelWidth / 4.0;\n"
3029          << "\n"
3030          << "    for (uint pixelIdx = 0u; pixelIdx < pixelsPerInvocation; ++pixelIdx)\n"
3031          << "    {\n"
3032          << "        const uint pixelId      = firstPixel + pixelIdx;\n"
3033          << "        const float pixelCenter = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
3034          << "        const float left        = pixelCenter - quarterWidth;\n"
3035          << "        const float right       = pixelCenter + quarterWidth;\n"
3036          << "\n"
3037          << "        const uint firstVertex = gl_LocalInvocationIndex * verticesPerInvocation + pixelIdx * "
3038             "verticesPerPixel;\n"
3039          << "        gl_MeshVerticesEXT[firstVertex + 0].gl_Position = vec4(left,  -1.0, 0.0f, 1.0f);\n"
3040          << "        gl_MeshVerticesEXT[firstVertex + 1].gl_Position = vec4(left,   1.0, 0.0f, 1.0f);\n"
3041          << "        gl_MeshVerticesEXT[firstVertex + 2].gl_Position = vec4(right, -1.0, 0.0f, 1.0f);\n"
3042          << "        gl_MeshVerticesEXT[firstVertex + 3].gl_Position = vec4(right,  1.0, 0.0f, 1.0f);\n"
3043          << "\n"
3044          << "        const uint firstPrimitive = gl_LocalInvocationIndex * primitivesPerInvocation + pixelIdx * "
3045             "primitivesPerPixel;\n"
3046          << "        triangleColor[firstPrimitive + 0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3047          << "        triangleColor[firstPrimitive + 1] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3048          << "\n"
3049          << "        const uint firstIndex = gl_LocalInvocationIndex * indicesPerInvocation + pixelIdx * "
3050             "indicesPerPixel;\n"
3051          << "        gl_PrimitiveTriangleIndicesEXT[firstPrimitive + 0] = uvec3(firstVertex + 0, firstVertex + 1, "
3052             "firstVertex + 2);\n"
3053          << "        gl_PrimitiveTriangleIndicesEXT[firstPrimitive + 1] = uvec3(firstVertex + 1, firstVertex + 3, "
3054             "firstVertex + 2);\n"
3055          << "    }\n"
3056          << "}\n";
3057     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
3058 }
3059 
generateReferenceLevel()3060 void MaximizeVerticesInstance::generateReferenceLevel()
3061 {
3062     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
3063 }
3064 
3065 // Focus on the number of invocations.
3066 class MaximizeInvocationsCase : public MeshShaderMiscCase
3067 {
3068 public:
MaximizeInvocationsCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)3069     MaximizeInvocationsCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
3070         : MeshShaderMiscCase(testCtx, name, std::move(params))
3071     {
3072         const auto mtParams = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
3073         DE_ASSERT(mtParams);
3074         DE_UNREF(mtParams); // For release builds.
3075     }
3076 
3077     void initPrograms(vk::SourceCollections &programCollection) const override;
3078     void checkSupport(Context &context) const override;
3079     TestInstance *createInstance(Context &context) const override;
3080 };
3081 
3082 class MaximizeInvocationsInstance : public MeshShaderMiscInstance
3083 {
3084 public:
MaximizeInvocationsInstance(Context & context,const MiscTestParams * params)3085     MaximizeInvocationsInstance(Context &context, const MiscTestParams *params)
3086         : MeshShaderMiscInstance(context, params)
3087     {
3088     }
3089 
3090     void generateReferenceLevel() override;
3091 };
3092 
createInstance(Context & context) const3093 TestInstance *MaximizeInvocationsCase::createInstance(Context &context) const
3094 {
3095     return new MaximizeInvocationsInstance(context, m_params.get());
3096 }
3097 
checkSupport(Context & context) const3098 void MaximizeInvocationsCase::checkSupport(Context &context) const
3099 {
3100     MeshShaderMiscCase::checkSupport(context);
3101 
3102     const auto params = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
3103     params->checkSupport(context);
3104 }
3105 
initPrograms(vk::SourceCollections & programCollection) const3106 void MaximizeInvocationsCase::initPrograms(vk::SourceCollections &programCollection) const
3107 {
3108     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
3109     const auto params       = dynamic_cast<MaximizeThreadsParams *>(m_params.get());
3110 
3111     DE_ASSERT(!params->needsTaskShader());
3112     MeshShaderMiscCase::initPrograms(programCollection);
3113 
3114     // Idea behind the test: use two invocations to generate one point per framebuffer pixel.
3115     DE_ASSERT(params->localSize == params->width * 2u);
3116     DE_ASSERT(params->localSize == params->numPrimitives * 2u);
3117     DE_ASSERT(params->localSize == params->numVertices * 2u);
3118 
3119     std::ostringstream mesh;
3120     mesh << "#version 450\n"
3121          << "#extension GL_EXT_mesh_shader : enable\n"
3122          << "\n"
3123          << "layout(local_size_x=" << params->localSize << ") in;\n"
3124          << "layout(points) out;\n"
3125          << "layout(max_vertices=" << params->numVertices << ", max_primitives=" << params->numPrimitives << ") out;\n"
3126          << "\n"
3127          << "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
3128          << "\n"
3129          << "void main ()\n"
3130          << "{\n"
3131          << "    SetMeshOutputsEXT(" << params->numVertices << ", " << params->numPrimitives << ");\n"
3132          << "    const uint pixelId = gl_LocalInvocationIndex / 2u;\n"
3133          << "    if (gl_LocalInvocationIndex % 2u == 0u)\n"
3134          << "    {\n"
3135          << "        const float xCoord = (float(pixelId) + 0.5) / float(" << params->width << ") * 2.0 - 1.0;\n"
3136          << "        gl_MeshVerticesEXT[pixelId].gl_Position = vec4(xCoord, 0.0, 0.0f, 1.0f);\n"
3137          << "        gl_MeshVerticesEXT[pixelId].gl_PointSize = 1.0f;\n"
3138          << "    }\n"
3139          << "    else\n"
3140          << "    {\n"
3141          << "        gl_PrimitivePointIndicesEXT[pixelId] = pixelId;\n"
3142          << "        pointColor[pixelId] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3143          << "    }\n"
3144          << "}\n";
3145     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
3146 }
3147 
generateReferenceLevel()3148 void MaximizeInvocationsInstance::generateReferenceLevel()
3149 {
3150     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
3151 }
3152 
3153 // Verify mixing classic and mesh shading pipelines in the same render pass.
3154 struct MixedPipelinesParams : public MiscTestParams
3155 {
3156 public:
3157     bool dynamicTopology;
3158 
MixedPipelinesParamsvkt::MeshShader::__anonb7c155300111::MixedPipelinesParams3159     MixedPipelinesParams(const tcu::Maybe<tcu::UVec3> &taskCount_, const tcu::UVec3 &meshCount_, uint32_t width_,
3160                          uint32_t height_, bool dynamicTopology_)
3161         : MiscTestParams(taskCount_, meshCount_, width_, height_)
3162         , dynamicTopology(dynamicTopology_)
3163     {
3164     }
3165 };
3166 
3167 // Global idea behind this case: draw 4 times with classic, mesh, classic and mesh pipelines. Each draw will use a full screen quad
3168 // and a dynamic scissor to restrict drawing in the framebuffer to one specific quadrant of the color attachment. The color of each
3169 // quadrant will be taken from a push constant that changes between steps, so each quadrant ends up with a different color.
3170 class MixedPipelinesCase : public MeshShaderMiscCase
3171 {
3172 public:
MixedPipelinesCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)3173     MixedPipelinesCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
3174         : MeshShaderMiscCase(testCtx, name, std::move(params))
3175     {
3176     }
3177 
3178     void checkSupport(Context &context) const override;
3179     void initPrograms(vk::SourceCollections &programCollection) const override;
3180     TestInstance *createInstance(Context &context) const override;
3181 };
3182 
3183 class MixedPipelinesInstance : public MeshShaderMiscInstance
3184 {
3185 public:
MixedPipelinesInstance(Context & context,const MiscTestParams * params)3186     MixedPipelinesInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
3187     {
3188     }
3189 
3190     typedef std::pair<VkRect2D, tcu::Vec4> RectColor;
3191     typedef std::vector<RectColor> RectColorVec;
3192     RectColorVec getQuadrantColors();
3193     tcu::Vec4 getClearColor();
3194 
3195     void generateReferenceLevel() override;
3196     tcu::TestStatus iterate() override;
3197 };
3198 
createInstance(Context & context) const3199 TestInstance *MixedPipelinesCase::createInstance(Context &context) const
3200 {
3201     return new MixedPipelinesInstance(context, m_params.get());
3202 }
3203 
checkSupport(Context & context) const3204 void MixedPipelinesCase::checkSupport(Context &context) const
3205 {
3206     const auto params = dynamic_cast<MixedPipelinesParams *>(m_params.get());
3207     DE_ASSERT(params);
3208 
3209     MeshShaderMiscCase::checkSupport(context);
3210 
3211     if (params->dynamicTopology)
3212         context.requireDeviceFunctionality("VK_EXT_extended_dynamic_state");
3213 }
3214 
initPrograms(vk::SourceCollections & programCollection) const3215 void MixedPipelinesCase::initPrograms(vk::SourceCollections &programCollection) const
3216 {
3217     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
3218 
3219     DE_ASSERT(!m_params->needsTaskShader());
3220 
3221     // The fragment shader will draw using the color indicated by the push constant.
3222     const std::string frag = "#version 450\n"
3223                              "\n"
3224                              "layout (location=0) out vec4 outColor;\n"
3225                              "layout (push_constant, std430) uniform PushConstantBlock {\n"
3226                              "    vec4 color;\n"
3227                              "} pc;\n"
3228                              "\n"
3229                              "void main ()\n"
3230                              "{\n"
3231                              "    outColor = pc.color;\n"
3232                              "}\n";
3233     programCollection.glslSources.add("frag") << glu::FragmentSource(frag);
3234 
3235     const std::string vert = "#version 450\n"
3236                              "\n"
3237                              "void main()\n"
3238                              "{\n"
3239                              // Full-screen clockwise triangle strip with 4 vertices.
3240                              "    const float x = (-1.0+2.0*((gl_VertexIndex & 2)>>1));\n"
3241                              "    const float y = ( 1.0-2.0* (gl_VertexIndex % 2));\n"
3242                              "    gl_Position = vec4(x, y, 0.0, 1.0);\n"
3243                              "}\n";
3244     programCollection.glslSources.add("vert") << glu::VertexSource(vert);
3245 
3246     const std::string mesh = "#version 450\n"
3247                              "#extension GL_EXT_mesh_shader : enable\n"
3248                              "\n"
3249                              "layout(local_size_x=4) in;\n"
3250                              "layout(triangles) out;\n"
3251                              "layout(max_vertices=4, max_primitives=2) out;\n"
3252                              "\n"
3253                              "void main ()\n"
3254                              "{\n"
3255                              "    SetMeshOutputsEXT(4u, 2u);\n"
3256                              // Full-screen clockwise triangle strip with 4 vertices.
3257                              "    const float x = (-1.0+2.0*((gl_LocalInvocationIndex & 2)>>1));\n"
3258                              "    const float y = ( 1.0-2.0*((gl_LocalInvocationIndex & 1)   ));\n"
3259                              "    gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(x, y, 0.0, 1.0);\n"
3260                              "    if (gl_LocalInvocationIndex == 0u) {\n"
3261                              "        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0u, 1u, 2u);\n"
3262                              "        gl_PrimitiveTriangleIndicesEXT[1] = uvec3(2u, 1u, 3u);\n"
3263                              "    }\n"
3264                              "}\n";
3265     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh) << buildOptions;
3266 }
3267 
getQuadrantColors()3268 MixedPipelinesInstance::RectColorVec MixedPipelinesInstance::getQuadrantColors()
3269 {
3270     const auto width       = m_params->width;
3271     const auto height      = m_params->height;
3272     const auto halfWidth   = width / 2u;
3273     const auto halfHeight  = height / 2u;
3274     const auto iHalfWidth  = static_cast<int>(halfWidth);
3275     const auto iHalfHeight = static_cast<int>(halfHeight);
3276 
3277     DE_ASSERT(width % 2u == 0u);
3278     DE_ASSERT(height % 2u == 0u);
3279 
3280     // Associate a different color to each rectangle.
3281     const RectColorVec quadrantColors{
3282         std::make_pair(makeRect2D(0, 0, halfWidth, halfHeight), tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f)),
3283         std::make_pair(makeRect2D(0, iHalfHeight, halfWidth, halfHeight), tcu::Vec4(0.0f, 1.0f, 1.0f, 1.0f)),
3284         std::make_pair(makeRect2D(iHalfWidth, 0, halfWidth, halfHeight), tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f)),
3285         std::make_pair(makeRect2D(iHalfWidth, iHalfHeight, halfWidth, halfHeight), tcu::Vec4(1.0f, 1.0f, 1.0f, 1.0f)),
3286     };
3287     return quadrantColors;
3288 }
3289 
getClearColor()3290 tcu::Vec4 MixedPipelinesInstance::getClearColor()
3291 {
3292     return tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f);
3293 }
3294 
generateReferenceLevel()3295 void MixedPipelinesInstance::generateReferenceLevel()
3296 {
3297     const auto format    = getOutputFormat();
3298     const auto tcuFormat = mapVkFormat(format);
3299 
3300     const auto iWidth  = static_cast<int>(m_params->width);
3301     const auto iHeight = static_cast<int>(m_params->height);
3302 
3303     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
3304 
3305     const auto access     = m_referenceLevel->getAccess();
3306     const auto quadColors = getQuadrantColors();
3307     const auto clearColor = getClearColor();
3308 
3309     // Each image quadrant gets a different color.
3310     tcu::clear(access, clearColor);
3311 
3312     for (int y = 0; y < iHeight; ++y)
3313         for (int x = 0; x < iWidth; ++x)
3314         {
3315             for (const auto &quadrant : quadColors)
3316             {
3317                 const auto minX = quadrant.first.offset.x;
3318                 const auto minY = quadrant.first.offset.y;
3319                 const auto maxX = quadrant.first.offset.x + static_cast<int32_t>(quadrant.first.extent.width);
3320                 const auto maxY = quadrant.first.offset.y + static_cast<int32_t>(quadrant.first.extent.height);
3321 
3322                 if (x >= minX && x < maxX && y >= minY && y < maxY)
3323                     access.setPixel(quadrant.second, x, y);
3324             }
3325         }
3326 }
3327 
iterate()3328 tcu::TestStatus MixedPipelinesInstance::iterate()
3329 {
3330     const auto params = dynamic_cast<const MixedPipelinesParams *>(m_params);
3331     DE_ASSERT(params);
3332 
3333     const auto &vkd       = m_context.getDeviceInterface();
3334     const auto device     = m_context.getDevice();
3335     auto &alloc           = m_context.getDefaultAllocator();
3336     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
3337     const auto queue      = m_context.getUniversalQueue();
3338 
3339     const auto dynTopo     = params->dynamicTopology;
3340     const auto imageFormat = getOutputFormat();
3341     const auto tcuFormat   = mapVkFormat(imageFormat);
3342     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
3343     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3344 
3345     const VkImageCreateInfo colorBufferInfo = {
3346         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
3347         nullptr,                             // const void* pNext;
3348         0u,                                  // VkImageCreateFlags flags;
3349         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
3350         imageFormat,                         // VkFormat format;
3351         imageExtent,                         // VkExtent3D extent;
3352         1u,                                  // uint32_t mipLevels;
3353         1u,                                  // uint32_t arrayLayers;
3354         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
3355         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
3356         imageUsage,                          // VkImageUsageFlags usage;
3357         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
3358         0u,                                  // uint32_t queueFamilyIndexCount;
3359         nullptr,                             // const uint32_t* pQueueFamilyIndices;
3360         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
3361     };
3362 
3363     // Create color image and view.
3364     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
3365     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
3366     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
3367     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
3368 
3369     // Create a memory buffer for verification.
3370     const auto verificationBufferSize =
3371         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
3372     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3373     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
3374 
3375     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
3376     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
3377     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
3378 
3379     // Pipeline layouts for the mesh and classic pipelines.
3380     const auto pcSize                = static_cast<uint32_t>(sizeof(tcu::Vec4));
3381     const auto pcRange               = makePushConstantRange(VK_SHADER_STAGE_FRAGMENT_BIT, 0u, pcSize);
3382     const auto classicPipelineLayout = makePipelineLayout(vkd, device, DE_NULL, &pcRange);
3383     const auto meshPipelineLayout    = makePipelineLayout(vkd, device, DE_NULL, &pcRange);
3384 
3385     // Shader modules.
3386     const auto &binaries  = m_context.getBinaryCollection();
3387     const auto vertShader = createShaderModule(vkd, device, binaries.get("vert"));
3388     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
3389     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
3390 
3391     // Render pass.
3392     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
3393 
3394     // Framebuffer.
3395     const auto framebuffer =
3396         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
3397 
3398     // Viewport and scissor.
3399     const std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
3400     const std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
3401 
3402     // Color blending.
3403     const auto colorWriteMask =
3404         (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
3405     const VkPipelineColorBlendAttachmentState blendAttState = {
3406         VK_TRUE,             // VkBool32 blendEnable;
3407         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
3408         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstColorBlendFactor;
3409         VK_BLEND_OP_ADD,     // VkBlendOp colorBlendOp;
3410         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
3411         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstAlphaBlendFactor;
3412         VK_BLEND_OP_ADD,     // VkBlendOp alphaBlendOp;
3413         colorWriteMask,      // VkColorComponentFlags colorWriteMask;
3414     };
3415 
3416     const VkPipelineColorBlendStateCreateInfo colorBlendInfo = {
3417         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
3418         nullptr,                                                  // const void* pNext;
3419         0u,                                                       // VkPipelineColorBlendStateCreateFlags flags;
3420         VK_FALSE,                                                 // VkBool32 logicOpEnable;
3421         VK_LOGIC_OP_OR,                                           // VkLogicOp logicOp;
3422         1u,                                                       // uint32_t attachmentCount;
3423         &blendAttState,           // const VkPipelineColorBlendAttachmentState* pAttachments;
3424         {0.0f, 0.0f, 0.0f, 0.0f}, // float blendConstants[4];
3425     };
3426 
3427     const std::vector<VkDynamicState> meshDynamicStates{VK_DYNAMIC_STATE_SCISSOR};
3428     std::vector<VkDynamicState> classicDynamicStates(meshDynamicStates);
3429     if (dynTopo)
3430         classicDynamicStates.push_back(VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT);
3431 
3432     const VkPipelineDynamicStateCreateInfo meshDynamicStateInfo = {
3433         VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType;
3434         nullptr,                                              // const void* pNext;
3435         0u,                                                   // VkPipelineDynamicStateCreateFlags flags;
3436         static_cast<uint32_t>(meshDynamicStates.size()),      // uint32_t dynamicStateCount;
3437         de::dataOrNull(meshDynamicStates),                    // const VkDynamicState* pDynamicStates;
3438     };
3439     const VkPipelineDynamicStateCreateInfo classicDynamicStateInfo = {
3440         VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType;
3441         nullptr,                                              // const void* pNext;
3442         0u,                                                   // VkPipelineDynamicStateCreateFlags flags;
3443         static_cast<uint32_t>(classicDynamicStates.size()),   // uint32_t dynamicStateCount;
3444         de::dataOrNull(classicDynamicStates),                 // const VkDynamicState* pDynamicStates;
3445     };
3446 
3447     const auto meshPipeline = makeGraphicsPipeline(
3448         vkd, device, meshPipelineLayout.get(), DE_NULL, meshShader.get(), fragShader.get(), renderPass.get(), viewports,
3449         scissors, 0u /*subpass*/, nullptr, nullptr, nullptr, &colorBlendInfo, &meshDynamicStateInfo);
3450 
3451     const VkPipelineVertexInputStateCreateInfo vertexInputInfo = initVulkanStructure();
3452 
3453     const auto staticTopo      = (dynTopo ? VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP);
3454     const auto classicPipeline = makeGraphicsPipeline(
3455         vkd, device, classicPipelineLayout.get(), vertShader.get(), DE_NULL, DE_NULL, DE_NULL, fragShader.get(),
3456         renderPass.get(), viewports, scissors, staticTopo, 0u /*subpass*/, 0u /*patchControlPoints*/, &vertexInputInfo,
3457         nullptr, nullptr, nullptr, nullptr, &classicDynamicStateInfo);
3458 
3459     // Command pool and buffer.
3460     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
3461     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3462     const auto cmdBuffer    = cmdBufferPtr.get();
3463 
3464     // Pipeline list.
3465     beginCommandBuffer(vkd, cmdBuffer);
3466 
3467     // Run pipeline.
3468     const auto clearColor = getClearColor();
3469     const auto drawCount  = m_params->drawCount();
3470     const auto quadColors = getQuadrantColors();
3471     DE_ASSERT(drawCount.x() == 1u && drawCount.y() == 1u && drawCount.z() == 1u);
3472 
3473     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
3474     for (size_t idx = 0u; idx < quadColors.size(); ++idx)
3475     {
3476         const auto &rectColor = quadColors.at(idx);
3477         vkd.cmdSetScissor(cmdBuffer, 0u, 1u, &rectColor.first);
3478 
3479         if (idx % 2u == 0u)
3480         {
3481             vkd.cmdPushConstants(cmdBuffer, classicPipelineLayout.get(), VK_SHADER_STAGE_FRAGMENT_BIT, 0u, pcSize,
3482                                  &rectColor.second);
3483             if (dynTopo)
3484                 vkd.cmdSetPrimitiveTopology(cmdBuffer, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP);
3485             vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, classicPipeline.get());
3486             vkd.cmdDraw(cmdBuffer, 4u, 1u, 0u, 0u);
3487         }
3488         else
3489         {
3490             vkd.cmdPushConstants(cmdBuffer, meshPipelineLayout.get(), VK_SHADER_STAGE_FRAGMENT_BIT, 0u, pcSize,
3491                                  &rectColor.second);
3492             vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
3493             vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
3494         }
3495     }
3496     endRenderPass(vkd, cmdBuffer);
3497 
3498     // Copy color buffer to verification buffer.
3499     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
3500     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
3501     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
3502     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
3503 
3504     const auto preCopyBarrier =
3505         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3506                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
3507     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
3508     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
3509 
3510     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
3511                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
3512     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
3513                              verificationBuffer.get(), 1u, &copyRegion);
3514     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
3515                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
3516 
3517     endCommandBuffer(vkd, cmdBuffer);
3518     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
3519 
3520     // Generate reference image and compare results.
3521     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
3522     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
3523 
3524     generateReferenceLevel();
3525     invalidateAlloc(vkd, device, verificationBufferAlloc);
3526     if (!verifyResult(verificationAccess))
3527         TCU_FAIL("Result does not match reference; check log for details");
3528 
3529     return tcu::TestStatus::pass("Pass");
3530 }
3531 
3532 // Tests to check SetMeshOutputsEXT() and EmitMeshTasksEXT() take values from the first invocation.
3533 class FirstInvocationCase : public MeshShaderMiscCase
3534 {
3535 public:
FirstInvocationCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)3536     FirstInvocationCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
3537         : MeshShaderMiscCase(testCtx, name, std::move(params))
3538     {
3539     }
3540 
3541     void checkSupport(Context &context) const override;
3542     void initPrograms(vk::SourceCollections &programCollection) const override;
3543     TestInstance *createInstance(Context &context) const override;
3544 
3545     static constexpr uint32_t kColoredPixels = 120u;
3546 };
3547 
3548 class FirstInvocationInstance : public MeshShaderMiscInstance
3549 {
3550 public:
FirstInvocationInstance(Context & context,const MiscTestParams * params)3551     FirstInvocationInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
3552     {
3553     }
3554 
3555     void generateReferenceLevel() override;
3556 };
3557 
generateReferenceLevel()3558 void FirstInvocationInstance::generateReferenceLevel()
3559 {
3560     DE_ASSERT(m_params->height == 1u && m_params->width == 128u);
3561     DE_ASSERT(FirstInvocationCase::kColoredPixels < m_params->width);
3562 
3563     const auto format    = getOutputFormat();
3564     const auto tcuFormat = mapVkFormat(format);
3565 
3566     const auto iWidth  = static_cast<int>(m_params->width);
3567     const auto iHeight = static_cast<int>(m_params->height);
3568 
3569     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
3570 
3571     const auto clearColor = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
3572     const auto geomColor  = tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f);
3573     const auto access     = m_referenceLevel->getAccess();
3574 
3575     // Fill the expected amount of colored pixels with solid color.
3576     for (int i = 0; i < iWidth; ++i)
3577     {
3578         const auto &color = ((static_cast<uint32_t>(i) < FirstInvocationCase::kColoredPixels) ? geomColor : clearColor);
3579         access.setPixel(color, i, 0);
3580     }
3581 }
3582 
createInstance(Context & context) const3583 TestInstance *FirstInvocationCase::createInstance(Context &context) const
3584 {
3585     return new FirstInvocationInstance(context, m_params.get());
3586 }
3587 
checkSupport(Context & context) const3588 void FirstInvocationCase::checkSupport(Context &context) const
3589 {
3590     MeshShaderMiscCase::checkSupport(context);
3591 
3592     if (context.getUsedApiVersion() < VK_MAKE_VERSION(1, 1, 0))
3593         TCU_THROW(NotSupportedError, "Vulkan API version >= 1.1 required");
3594 
3595     const auto &subgroupProperties = context.getSubgroupProperties();
3596     if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT))
3597         TCU_THROW(NotSupportedError, "Subgroup basic features not supported");
3598 }
3599 
initPrograms(vk::SourceCollections & programCollection) const3600 void FirstInvocationCase::initPrograms(vk::SourceCollections &programCollection) const
3601 {
3602     DE_ASSERT(m_params->height == 1u && m_params->width == 128u);
3603     DE_ASSERT(kColoredPixels < m_params->width);
3604 
3605     // Add generic fragment shader.
3606     MeshShaderMiscCase::initPrograms(programCollection);
3607 
3608     const bool useTask         = m_params->needsTaskShader();
3609     const auto fbWidth         = m_params->width;
3610     const auto meshLocalSize   = (useTask ? 1u : fbWidth);
3611     const auto taskLocalSize   = fbWidth;
3612     const auto pointsPerMeshWG = (useTask ? 1u : kColoredPixels);
3613     const auto jobID           = (useTask ? "gl_WorkGroupID.x" : "gl_LocalInvocationIndex");
3614     const auto buildOptions    = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
3615 
3616     std::string taskDataDecl;
3617     if (useTask)
3618     {
3619         std::ostringstream aux;
3620         aux << "struct TaskData {\n"
3621             << "    uint values[" << taskLocalSize << "];\n"
3622             << "};\n"
3623             << "taskPayloadSharedEXT TaskData td;\n";
3624         taskDataDecl = aux.str();
3625     }
3626 
3627     if (useTask)
3628     {
3629         std::ostringstream task;
3630         task << "#version 450\n"
3631              << "#extension GL_EXT_mesh_shader : enable\n"
3632              << "#extension GL_KHR_shader_subgroup_basic : enable\n"
3633              << "\n"
3634              << "layout(local_size_x=" << taskLocalSize << ", local_size_y=1, local_size_z=1) in;\n"
3635              << "\n"
3636              << taskDataDecl << "\n"
3637              << "void main ()\n"
3638              << "{\n"
3639              << "    td.values[gl_LocalInvocationIndex] = gl_LocalInvocationIndex * 2u;\n"
3640              << "\n"
3641              << "    uint total_jobs = max(" << kColoredPixels << " / 2u, 1u);\n"
3642              << "    if (gl_LocalInvocationIndex == 0u) {\n"
3643              << "        total_jobs = " << kColoredPixels << ";\n"
3644              << "    } else if (gl_SubgroupID > 0u) {\n"
3645              << "        total_jobs = max(" << kColoredPixels << " / 4u, 1u);\n"
3646              << "    }\n"
3647              << "\n"
3648              << "    EmitMeshTasksEXT(total_jobs, 1u, 1u);\n"
3649              << "}\n";
3650 
3651         programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
3652     }
3653 
3654     {
3655         std::ostringstream mesh;
3656         mesh << "#version 450\n"
3657              << "#extension GL_EXT_mesh_shader : enable\n"
3658              << "#extension GL_KHR_shader_subgroup_basic : enable\n"
3659              << "\n"
3660              << "layout(local_size_x=" << meshLocalSize << ", local_size_y=1, local_size_z=1) in;\n"
3661              << "layout(points) out;\n"
3662              << "layout(max_primitives=" << meshLocalSize << ", max_vertices=" << meshLocalSize << ") out;\n"
3663              << "\n"
3664              << "layout (location=0) out perprimitiveEXT vec4 pointColor[];\n"
3665              << taskDataDecl << "\n"
3666              << "void main ()\n"
3667              << "{\n"
3668              << "    uint total_points = max(" << pointsPerMeshWG << " / 2u, 1u);\n"
3669              << "    \n";
3670 
3671         if (!useTask)
3672         {
3673             mesh << "    if (gl_LocalInvocationIndex == 0u) {\n"
3674                  << "        total_points = " << pointsPerMeshWG << ";\n"
3675                  << "    } else if (gl_SubgroupID > 0u) {\n"
3676                  << "        total_points = max(" << pointsPerMeshWG << " / 4u, 1u);\n"
3677                  << "    }\n"
3678                  << "    \n";
3679         }
3680 
3681         mesh << "    SetMeshOutputsEXT(total_points, total_points);\n"
3682              << "    if (gl_LocalInvocationIndex < " << pointsPerMeshWG << ") {\n"
3683              << "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 1.0;\n"
3684              << "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(((float(" << jobID
3685              << ") + 0.5) / " << fbWidth << ") * 2.0 - 1.0, 0.0, 0.0, 1.0);\n"
3686              << "        gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;\n"
3687              << "        pointColor[gl_LocalInvocationIndex] = vec4(0.0, 0.0, 1.0, 1.0);\n"
3688              << "    }\n"
3689              << "}\n";
3690 
3691         programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
3692     }
3693 }
3694 
3695 // Tests that check LocalSizeId works as expected.
3696 class LocalSizeIdCase : public MeshShaderMiscCase
3697 {
3698 public:
LocalSizeIdCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)3699     LocalSizeIdCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
3700         : MeshShaderMiscCase(testCtx, name, std::move(params))
3701     {
3702     }
3703 
3704     void checkSupport(Context &context) const override;
3705     void initPrograms(vk::SourceCollections &programCollection) const override;
3706     TestInstance *createInstance(Context &context) const override;
3707 };
3708 
3709 class LocalSizeIdInstance : public MeshShaderMiscInstance
3710 {
3711 public:
LocalSizeIdInstance(Context & context,const MiscTestParams * params)3712     LocalSizeIdInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
3713     {
3714     }
3715 
3716     void generateReferenceLevel() override;
3717     tcu::TestStatus iterate() override;
3718 };
3719 
createInstance(Context & context) const3720 TestInstance *LocalSizeIdCase::createInstance(Context &context) const
3721 {
3722     return new LocalSizeIdInstance(context, m_params.get());
3723 }
3724 
generateReferenceLevel()3725 void LocalSizeIdInstance::generateReferenceLevel()
3726 {
3727     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
3728 }
3729 
checkSupport(Context & context) const3730 void LocalSizeIdCase::checkSupport(Context &context) const
3731 {
3732     // Generic checks.
3733     MeshShaderMiscCase::checkSupport(context);
3734 
3735     // Needed for LocalSizeId.
3736     context.requireDeviceFunctionality("VK_KHR_maintenance4");
3737 }
3738 
initPrograms(vk::SourceCollections & programCollection) const3739 void LocalSizeIdCase::initPrograms(vk::SourceCollections &programCollection) const
3740 {
3741     const SpirVAsmBuildOptions spvOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_5,
3742                                           false /*allowSpirv14*/, true /*allowMaintenance4*/);
3743     const auto useTask = m_params->needsTaskShader();
3744 
3745     DE_ASSERT(m_params->height == 1u && m_params->width == 32u);
3746 
3747     // Add generic fragment shader.
3748     MeshShaderMiscCase::initPrograms(programCollection);
3749 
3750     if (useTask)
3751     {
3752         // Roughly equivalent to the following shader.
3753         //    #version 450
3754         //    #extension GL_EXT_mesh_shader : enable
3755         //
3756         // layout(local_size_x_id=10, local_size_y_id=11, local_size_z_id=12) in;
3757         //    struct TaskData {
3758         //     uint pixelID[32];
3759         // };
3760         // taskPayloadSharedEXT TaskData td;
3761         //
3762         //    void main ()
3763         //    {
3764         //     td.pixelID[gl_LocalInvocationIndex] = gl_LocalInvocationIndex;
3765         //     EmitMeshTasksEXT(1u, 1u, 1u);
3766         //    }
3767 
3768         std::ostringstream taskSPV;
3769         taskSPV << "      ; SPIR-V\n"
3770                 << "      ; Version: 1.0\n"
3771                 << "      ; Generator: Khronos Glslang Reference Front End; 10\n"
3772                 << "      ; Bound: 26\n"
3773                 << "      ; Schema: 0\n"
3774                 << "      OpCapability MeshShadingEXT\n"
3775                 << "      OpExtension \"SPV_EXT_mesh_shader\"\n"
3776                 << " %1 = OpExtInstImport \"GLSL.std.450\"\n"
3777                 << "      OpMemoryModel Logical GLSL450\n"
3778                 << "      OpEntryPoint TaskEXT %4 \"main\" %11 %15\n"
3779                 << "      OpExecutionModeId %4 LocalSizeId %21 %22 %23\n"
3780                 << "      OpDecorate %15 BuiltIn LocalInvocationIndex\n"
3781                 << "      OpDecorate %21 SpecId 10\n"
3782                 << "      OpDecorate %22 SpecId 11\n"
3783                 << "      OpDecorate %23 SpecId 12\n"
3784                 << " %2 = OpTypeVoid\n"
3785                 << " %3 = OpTypeFunction %2\n"
3786                 << " %6 = OpTypeInt 32 0\n"
3787                 << " %7 = OpConstant %6 32\n"
3788                 << " %8 = OpTypeArray %6 %7\n"
3789                 << " %9 = OpTypeStruct %8\n"
3790                 << "%10 = OpTypePointer TaskPayloadWorkgroupEXT %9\n"
3791                 << "%11 = OpVariable %10 TaskPayloadWorkgroupEXT\n"
3792                 << "%12 = OpTypeInt 32 1\n"
3793                 << "%13 = OpConstant %12 0\n"
3794                 << "%14 = OpTypePointer Input %6\n"
3795                 << "%15 = OpVariable %14 Input\n"
3796                 << "%18 = OpTypePointer TaskPayloadWorkgroupEXT %6\n"
3797                 << "%20 = OpConstant %6 1\n"
3798                 << "%21 = OpSpecConstant %6 1\n"
3799                 << "%22 = OpSpecConstant %6 1\n"
3800                 << "%23 = OpSpecConstant %6 1\n"
3801                 << " %4 = OpFunction %2 None %3\n"
3802                 << " %5 = OpLabel\n"
3803                 << "%16 = OpLoad %6 %15\n"
3804                 << "%17 = OpLoad %6 %15\n"
3805                 << "%19 = OpAccessChain %18 %11 %13 %16\n"
3806                 << "      OpStore %19 %17\n"
3807                 << "      OpEmitMeshTasksEXT %20 %20 %20 %11\n"
3808                 << "      OpFunctionEnd\n";
3809 
3810         programCollection.spirvAsmSources.add("task") << taskSPV.str() << spvOptions;
3811     }
3812 
3813     {
3814         // Roughly equivalent to the following shader.
3815         //    #version 450
3816         //    #extension GL_EXT_mesh_shader : enable
3817         //
3818         // layout(local_size_x_id=20, local_size_y_id=21, local_size_z_id=22) in;
3819         // layout(points) out;
3820         // layout(max_primitives=32, max_vertices=32) out;
3821         //
3822         // layout (location=0) out perprimitiveEXT vec4 pointColor[];
3823         //#if useTask
3824         //    struct TaskData {
3825         //     uint pixelID[32];
3826         // };
3827         // taskPayloadSharedEXT TaskData td;
3828         //#endif
3829         //
3830         //    void main ()
3831         //    {
3832         //#if useTask
3833         //     const uint pixelId = td.pixelID[gl_LocalInvocationIndex];
3834         //#else
3835         //     const uint pixelId = gl_LocalInvocationIndex;
3836         //#endif
3837         //     SetMeshOutputsEXT(32u, 32u);
3838         //     gl_MeshVerticesEXT[pixelId].gl_PointSize = 1.0;
3839         //     gl_MeshVerticesEXT[pixelId].gl_Position = vec4(((float(pixelId) + 0.5) / 32.0) * 2.0 - 1.0, 0.0, 0.0, 1.0);
3840         //     gl_PrimitivePointIndicesEXT[pixelId] = pixelId;
3841         //     pointColor[pixelId] = vec4(0.0, 0.0, 1.0, 1.0);
3842         //    }
3843         std::ostringstream meshSPV;
3844         meshSPV
3845             << "                              OpCapability MeshShadingEXT\n"
3846             << "                              OpExtension \"SPV_EXT_mesh_shader\"\n"
3847             << "                         %1 = OpExtInstImport \"GLSL.std.450\"\n"
3848             << "                              OpMemoryModel Logical GLSL450\n"
3849             << "                              OpEntryPoint MeshEXT %main \"main\" %local_invocation_index "
3850                "%mesh_vertices %primitive_point_indices %primitive_colors"
3851             << (useTask ? " %task_data" : "") << "\n"
3852             << "                              OpExecutionModeId %main LocalSizeId %constand_id_20 %constant_id_21 "
3853                "%constant_id_22\n"
3854             << "                              OpExecutionMode %main OutputVertices 32\n"
3855             << "                              OpExecutionMode %main OutputPrimitivesNV 32\n"
3856             << "                              OpExecutionMode %main OutputPoints\n"
3857             << "                              OpDecorate %local_invocation_index BuiltIn LocalInvocationIndex\n"
3858             << "                              OpMemberDecorate %mesh_vertices_struct 0 BuiltIn Position\n"
3859             << "                              OpMemberDecorate %mesh_vertices_struct 1 BuiltIn PointSize\n"
3860             << "                              OpMemberDecorate %mesh_vertices_struct 2 BuiltIn ClipDistance\n"
3861             << "                              OpMemberDecorate %mesh_vertices_struct 3 BuiltIn CullDistance\n"
3862             << "                              OpDecorate %mesh_vertices_struct Block\n"
3863             << "                              OpDecorate %primitive_point_indices BuiltIn PrimitivePointIndicesEXT\n"
3864             << "                              OpDecorate %primitive_colors PerPrimitiveEXT\n"
3865             << "                              OpDecorate %primitive_colors Location 0\n"
3866             << "                              OpDecorate %constand_id_20 SpecId 20\n"
3867             << "                              OpDecorate %constant_id_21 SpecId 21\n"
3868             << "                              OpDecorate %constant_id_22 SpecId 22\n"
3869             << "                 %type_void = OpTypeVoid\n"
3870             << "                 %void_func = OpTypeFunction %type_void\n"
3871             << "                       %int = OpTypeInt 32 1\n"
3872             << "                      %uint = OpTypeInt 32 0\n"
3873             << "                     %float = OpTypeFloat 32\n"
3874             << "                      %vec4 = OpTypeVector %float 4\n"
3875             << "                     %uvec3 = OpTypeVector %uint 3\n"
3876             << "                     %int_0 = OpConstant %int 0\n"
3877             << "                     %int_1 = OpConstant %int 1\n"
3878             << "                    %uint_1 = OpConstant %uint 1\n"
3879             << "                   %uint_32 = OpConstant %uint 32\n"
3880             << "                   %float_0 = OpConstant %float 0\n"
3881             << "                   %float_1 = OpConstant %float 1\n"
3882             << "                 %float_0_5 = OpConstant %float 0.5\n"
3883             << "                  %float_32 = OpConstant %float 32\n"
3884             << "                   %float_2 = OpConstant %float 2\n"
3885             << "             %float_array_1 = OpTypeArray %float %uint_1\n"
3886             << "             %func_uint_ptr = OpTypePointer Function %uint\n"
3887             << "            %input_uint_ptr = OpTypePointer Input %uint\n"
3888             << "    %local_invocation_index = OpVariable %input_uint_ptr Input\n"
3889             << "      %mesh_vertices_struct = OpTypeStruct %vec4 %float %float_array_1 %float_array_1\n"
3890             << "       %mesh_vertices_array = OpTypeArray %mesh_vertices_struct %uint_32\n"
3891             << "     %mesh_vertices_out_ptr = OpTypePointer Output %mesh_vertices_array\n"
3892             << "             %mesh_vertices = OpVariable %mesh_vertices_out_ptr Output\n"
3893             << "          %output_float_ptr = OpTypePointer Output %float\n"
3894             << "           %output_vec4_ptr = OpTypePointer Output %vec4\n"
3895             << "             %uint_array_32 = OpTypeArray %uint %uint_32\n"
3896             << "\n";
3897 
3898         if (useTask)
3899         {
3900             meshSPV << "\n"
3901                     << "%uint_array_32_struct                  = OpTypeStruct %uint_array_32\n"
3902                     << "%task_payload_uint_array_32_struct_ptr = OpTypePointer TaskPayloadWorkgroupEXT "
3903                        "%uint_array_32_struct\n"
3904                     << "%task_data                             = OpVariable %task_payload_uint_array_32_struct_ptr "
3905                        "TaskPayloadWorkgroupEXT\n"
3906                     << "%task_payload_uint_ptr                 = OpTypePointer TaskPayloadWorkgroupEXT %uint\n"
3907                     << "\n";
3908         }
3909 
3910         meshSPV << "  %output_uint_array_32_ptr = OpTypePointer Output %uint_array_32\n"
3911                 << "   %primitive_point_indices = OpVariable %output_uint_array_32_ptr Output\n"
3912                 << "           %output_uint_ptr = OpTypePointer Output %uint\n"
3913                 << "             %vec4_array_32 = OpTypeArray %vec4 %uint_32\n"
3914                 << "  %output_vec4_array_32_ptr = OpTypePointer Output %vec4_array_32\n"
3915                 << "          %primitive_colors = OpVariable %output_vec4_array_32_ptr Output\n"
3916                 << "                      %blue = OpConstantComposite %vec4 %float_0 %float_0 %float_1 %float_1\n"
3917                 << "            %constand_id_20 = OpSpecConstant %uint 1\n"
3918                 << "            %constant_id_21 = OpSpecConstant %uint 1\n"
3919                 << "            %constant_id_22 = OpSpecConstant %uint 1\n"
3920                 << "                      %main = OpFunction %type_void None %void_func\n"
3921                 << "                %main_label = OpLabel\n"
3922                 << "                  %pixel_id = OpVariable %func_uint_ptr Function\n"
3923                 << "%local_invocation_index_val = OpLoad %uint %local_invocation_index\n";
3924 
3925         if (useTask)
3926         {
3927             meshSPV << "           %td_pixel_id_ptr = OpAccessChain %task_payload_uint_ptr %task_data %int_0 "
3928                        "%local_invocation_index_val\n"
3929                     << "           %td_pixel_id_val = OpLoad %uint %td_pixel_id_ptr\n"
3930                     << "                              OpStore %pixel_id %td_pixel_id_val\n";
3931         }
3932         else
3933         {
3934             meshSPV << "                              OpStore %pixel_id %local_invocation_index_val\n";
3935         }
3936 
3937         meshSPV
3938             << "                              OpSetMeshOutputsEXT %uint_32 %uint_32\n"
3939             << "              %pixel_id_val = OpLoad %uint %pixel_id\n"
3940             << "                %point_size = OpAccessChain %output_float_ptr %mesh_vertices %pixel_id_val %int_1\n"
3941             << "                              OpStore %point_size %float_1\n"
3942             << "        %pixel_id_val_float = OpConvertUToF %float %pixel_id_val\n"
3943             << "       %pixel_id_val_center = OpFAdd %float %pixel_id_val_float %float_0_5\n"
3944             << "                   %x_unorm = OpFDiv %float %pixel_id_val_center %float_32\n"
3945             << "                 %x_unorm_2 = OpFMul %float %x_unorm %float_2\n"
3946             << "                    %x_norm = OpFSub %float %x_unorm_2 %float_1\n"
3947             << "                 %point_pos = OpCompositeConstruct %vec4 %x_norm %float_0 %float_0 %float_1\n"
3948             << "           %gl_position_ptr = OpAccessChain %output_vec4_ptr %mesh_vertices %pixel_id_val %int_0\n"
3949             << "                              OpStore %gl_position_ptr %point_pos\n"
3950             << "           %point_index_ptr = OpAccessChain %output_uint_ptr %primitive_point_indices %pixel_id_val\n"
3951             << "                              OpStore %point_index_ptr %pixel_id_val\n"
3952             << "           %point_color_ptr = OpAccessChain %output_vec4_ptr %primitive_colors %pixel_id_val\n"
3953             << "                              OpStore %point_color_ptr %blue\n"
3954             << "                              OpReturn\n"
3955             << "                              OpFunctionEnd\n";
3956 
3957         programCollection.spirvAsmSources.add("mesh") << meshSPV.str() << spvOptions;
3958     }
3959 }
3960 
iterate()3961 tcu::TestStatus LocalSizeIdInstance::iterate()
3962 {
3963     const auto &vkd       = m_context.getDeviceInterface();
3964     const auto device     = m_context.getDevice();
3965     auto &alloc           = m_context.getDefaultAllocator();
3966     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
3967     const auto queue      = m_context.getUniversalQueue();
3968 
3969     const auto imageFormat = getOutputFormat();
3970     const auto tcuFormat   = mapVkFormat(imageFormat);
3971     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
3972     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3973 
3974     const auto &binaries = m_context.getBinaryCollection();
3975     const auto hasTask   = binaries.contains("task");
3976 
3977     const VkImageCreateInfo colorBufferInfo = {
3978         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
3979         nullptr,                             // const void* pNext;
3980         0u,                                  // VkImageCreateFlags flags;
3981         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
3982         imageFormat,                         // VkFormat format;
3983         imageExtent,                         // VkExtent3D extent;
3984         1u,                                  // uint32_t mipLevels;
3985         1u,                                  // uint32_t arrayLayers;
3986         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
3987         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
3988         imageUsage,                          // VkImageUsageFlags usage;
3989         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
3990         0u,                                  // uint32_t queueFamilyIndexCount;
3991         nullptr,                             // const uint32_t* pQueueFamilyIndices;
3992         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
3993     };
3994 
3995     // Create color image and view.
3996     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
3997     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
3998     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
3999     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
4000 
4001     // Create a memory buffer for verification.
4002     const auto verificationBufferSize =
4003         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
4004     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4005     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
4006 
4007     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4008     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
4009     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
4010 
4011     // Pipeline layout.
4012     const auto pipelineLayout = makePipelineLayout(vkd, device, 0u, nullptr, 0u, nullptr);
4013 
4014     // Shader modules.
4015     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
4016     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
4017 
4018     Move<VkShaderModule> taskShader;
4019     if (hasTask)
4020         taskShader = createShaderModule(vkd, device, binaries.get("task"));
4021 
4022     // Spec constant data (must match shaders).
4023     const std::vector<uint32_t> scData{//    10        11        12        20        21        22
4024                                        32u, 1u, 1u, 32u, 1u, 1u};
4025     const auto scSize = static_cast<uint32_t>(sizeof(uint32_t));
4026     const std::vector<VkSpecializationMapEntry> scMapEntries{
4027         makeSpecializationMapEntry(10u, 0u * scSize, scSize), makeSpecializationMapEntry(11u, 1u * scSize, scSize),
4028         makeSpecializationMapEntry(12u, 2u * scSize, scSize), makeSpecializationMapEntry(20u, 3u * scSize, scSize),
4029         makeSpecializationMapEntry(21u, 4u * scSize, scSize), makeSpecializationMapEntry(22u, 5u * scSize, scSize),
4030     };
4031 
4032     const auto scMapInfo =
4033         makeSpecializationInfo(static_cast<uint32_t>(scMapEntries.size()), de::dataOrNull(scMapEntries),
4034                                static_cast<uint32_t>(de::dataSize(scData)), de::dataOrNull(scData));
4035 
4036     std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
4037     shaderStages.push_back(
4038         makePipelineShaderStageCreateInfo(VK_SHADER_STAGE_MESH_BIT_EXT, meshShader.get(), &scMapInfo));
4039     shaderStages.push_back(makePipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragShader.get()));
4040     if (hasTask)
4041         shaderStages.push_back(
4042             makePipelineShaderStageCreateInfo(VK_SHADER_STAGE_TASK_BIT_EXT, taskShader.get(), &scMapInfo));
4043 
4044     // Render pass.
4045     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
4046 
4047     // Framebuffer.
4048     const auto framebuffer =
4049         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
4050 
4051     // Viewport and scissor.
4052     const std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
4053     const std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
4054 
4055     // Pipeline with specialization constants.
4056     const auto pipeline = makeGraphicsPipeline(vkd, device, DE_NULL, pipelineLayout.get(), 0u, shaderStages,
4057                                                renderPass.get(), viewports, scissors);
4058 
4059     // Command pool and buffer.
4060     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
4061     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4062     const auto cmdBuffer    = cmdBufferPtr.get();
4063 
4064     beginCommandBuffer(vkd, cmdBuffer);
4065 
4066     // Run pipeline.
4067     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
4068     const auto drawCount = m_params->drawCount();
4069     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
4070     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4071     vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
4072     endRenderPass(vkd, cmdBuffer);
4073 
4074     // Copy color buffer to verification buffer.
4075     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
4076     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
4077     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
4078     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
4079 
4080     const auto preCopyBarrier =
4081         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4082                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
4083     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
4084     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
4085 
4086     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
4087                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4088     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4089                              verificationBuffer.get(), 1u, &copyRegion);
4090     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
4091                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4092 
4093     endCommandBuffer(vkd, cmdBuffer);
4094     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4095 
4096     // Generate reference image and compare results.
4097     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
4098     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
4099 
4100     generateReferenceLevel();
4101     invalidateAlloc(vkd, device, verificationBufferAlloc);
4102     if (!verifyResult(verificationAccess))
4103         TCU_FAIL("Result does not match reference; check log for details");
4104 
4105     return tcu::TestStatus::pass("Pass");
4106 }
4107 
4108 // Test multiple task payloads.
4109 class MultipleTaskPayloadsCase : public MeshShaderMiscCase
4110 {
4111 public:
MultipleTaskPayloadsCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)4112     MultipleTaskPayloadsCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
4113         : MeshShaderMiscCase(testCtx, name, std::move(params))
4114     {
4115     }
4116 
4117     void initPrograms(vk::SourceCollections &programCollection) const override;
4118     TestInstance *createInstance(Context &context) const override;
4119 
4120     static constexpr uint32_t kGoodKeyIdx = 1u;
4121 };
4122 
4123 class MultipleTaskPayloadsInstance : public MeshShaderMiscInstance
4124 {
4125 public:
MultipleTaskPayloadsInstance(Context & context,const MiscTestParams * params)4126     MultipleTaskPayloadsInstance(Context &context, const MiscTestParams *params)
4127         : MeshShaderMiscInstance(context, params)
4128     {
4129     }
4130 
4131     void generateReferenceLevel() override;
4132     tcu::TestStatus iterate() override;
4133 };
4134 
createInstance(Context & context) const4135 TestInstance *MultipleTaskPayloadsCase::createInstance(Context &context) const
4136 {
4137     return new MultipleTaskPayloadsInstance(context, m_params.get());
4138 }
4139 
initPrograms(vk::SourceCollections & programCollection) const4140 void MultipleTaskPayloadsCase::initPrograms(vk::SourceCollections &programCollection) const
4141 {
4142     DE_ASSERT(m_params->needsTaskShader());
4143 
4144     const auto buildOptions    = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
4145     const auto spvBuildOptions = getMinMeshEXTSpvBuildOptions(programCollection.usedVulkanVersion);
4146     const std::vector<uint32_t> keys{3717945376u, 2325956828u, 433982700u};
4147     //const std::vector<uint32_t> keys { 85u, 170u, 255u };
4148 
4149     // Generic fragment shader.
4150     MeshShaderMiscCase::initPrograms(programCollection);
4151 
4152     const std::string taskDataDecl = "struct TaskData {\n"
4153                                      "    uint key;\n"
4154                                      "};\n"
4155                                      "taskPayloadSharedEXT TaskData td;\n";
4156 
4157     // Idea behind this test: verify that the right payload was passed to the mesh shader and set the geometry color based on that.
4158     std::ostringstream mesh;
4159     mesh << "#version 450\n"
4160          << "#extension GL_EXT_mesh_shader : enable\n"
4161          << "\n"
4162          << "layout(local_size_x=1) in;\n"
4163          << "layout(triangles) out;\n"
4164          << "layout(max_vertices=3, max_primitives=1) out;\n"
4165          << "\n"
4166          << "layout(location=0) out perprimitiveEXT vec4 triangleColor[];\n"
4167          << taskDataDecl << "\n"
4168          << "void main ()\n"
4169          << "{\n"
4170          << "    SetMeshOutputsEXT(3, 1);\n"
4171          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0f, 1.0f);\n"
4172          << "    gl_MeshVerticesEXT[1].gl_Position = vec4( 3.0, -1.0, 0.0f, 1.0f);\n"
4173          << "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0,  3.0, 0.0f, 1.0f);\n"
4174          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
4175          << "    const vec4 color = ((td.key == " << keys[kGoodKeyIdx]
4176          << "u) ? vec4(0.0, 0.0, 1.0, 1.0) : vec4(0.0, 0.0, 0.0, 1.0));\n"
4177          //<< "    const vec4 color = vec4(0.0, 0.0, (float(td.key) / 255.0), 1.0);\n"
4178          << "    triangleColor[0] = color;\n"
4179          << "}\n";
4180     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
4181 
4182     const auto &meshCount = m_params->meshCount;
4183     DE_ASSERT(meshCount.x() == 1u && meshCount.y() == 1u && meshCount.z() == 1u);
4184     DE_UNREF(meshCount); // For release builds.
4185 
4186 #if 0
4187 #if 0
4188     // Note: pseudocode, this actually does not compile with glslang.
4189     std::ostringstream task;
4190     task
4191         << "#version 450\n"
4192         << "#extension GL_EXT_mesh_shader : enable\n"
4193         << "\n"
4194         << "layout(local_size_x=1) in;\n"
4195         << "layout(push_constant, std430) uniform PCBlock {\n"
4196         << "    uint index;\n"
4197         << "} pc;\n"
4198         << "struct TaskData {\n"
4199         << "    uint key;\n"
4200         << "};\n"
4201         << "taskPayloadSharedEXT TaskData td0;\n"
4202         << "taskPayloadSharedEXT TaskData td1;\n"
4203         << "taskPayloadSharedEXT TaskData td2;\n"
4204         << "\n"
4205         << "void main ()\n"
4206         << "{\n"
4207         << "    td0.key = " << keys.at(0) << "u;\n"
4208         << "    td1.key = " << keys.at(1) << "u;\n"
4209         << "    td2.key = " << keys.at(2) << "u;\n"
4210         << "    if (pc.index == 0u)      EmitMeshTasksEXT(1u, 1u, 1u, td0);\n"
4211         << "    else if (pc.index == 1u) EmitMeshTasksEXT(1u, 1u, 1u, td1);\n"
4212         << "    else                     EmitMeshTasksEXT(1u, 1u, 1u, td2);\n"
4213         << "}\n"
4214         ;
4215     programCollection.glslSources.add("task") << glu::TaskSource(task.str());
4216 #else
4217     // Similar shader to check the setup works.
4218     std::ostringstream task;
4219     task
4220         << "#version 450\n"
4221         << "#extension GL_EXT_mesh_shader : enable\n"
4222         << "\n"
4223         << "layout(local_size_x=1) in;\n"
4224         << "layout(push_constant, std430) uniform PCBlock {\n"
4225         << "    uint index;\n"
4226         << "} pc;\n"
4227         << "struct TaskData {\n"
4228         << "    uint key;\n"
4229         << "};\n"
4230         << "taskPayloadSharedEXT TaskData td;\n"
4231         << "\n"
4232         << "void main ()\n"
4233         << "{\n"
4234         << "    if (pc.index == 0u)      td.key = " << keys.at(0) << "u;\n"
4235         << "    else if (pc.index == 1u) td.key = " << keys.at(1) << "u;\n"
4236         << "    else                     td.key = " << keys.at(2) << "u;\n"
4237         << "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
4238         << "}\n"
4239         ;
4240     programCollection.glslSources.add("task") << glu::TaskSource(task.str());
4241 #endif
4242 #else
4243     std::ostringstream taskSPV;
4244     taskSPV << "                    OpCapability MeshShadingEXT\n"
4245             << "                    OpExtension \"SPV_EXT_mesh_shader\"\n"
4246             << "               %1 = OpExtInstImport \"GLSL.std.450\"\n"
4247             << "                    OpMemoryModel Logical GLSL450\n"
4248             << "                    OpEntryPoint TaskEXT %main \"main\"\n"
4249             << "                    OpExecutionMode %main LocalSize 1 1 1\n"
4250             << "                    OpMemberDecorate %PCBlock 0 Offset 0\n"
4251             << "                    OpDecorate %PCBlock Block\n"
4252             << "                    OpDecorate %work_group_size BuiltIn WorkgroupSize\n"
4253             << "               %2 = OpTypeVoid\n"
4254             << "               %3 = OpTypeFunction %2\n"
4255             << "            %uint = OpTypeInt 32 0\n"
4256             << "        %TaskData = OpTypeStruct %uint\n"
4257             << "    %TaskData_ptr = OpTypePointer TaskPayloadWorkgroupEXT %TaskData\n"
4258             << "       %payload_0 = OpVariable %TaskData_ptr TaskPayloadWorkgroupEXT\n"
4259             << "       %payload_1 = OpVariable %TaskData_ptr TaskPayloadWorkgroupEXT\n"
4260             << "       %payload_2 = OpVariable %TaskData_ptr TaskPayloadWorkgroupEXT\n"
4261             << "             %int = OpTypeInt 32 1\n"
4262             << "           %int_0 = OpConstant %int 0\n"
4263             << "           %key_0 = OpConstant %uint " << keys.at(0) << "\n"
4264             << "           %key_1 = OpConstant %uint " << keys.at(1) << "\n"
4265             << "           %key_2 = OpConstant %uint " << keys.at(2) << "\n"
4266             << "%payload_uint_ptr = OpTypePointer TaskPayloadWorkgroupEXT %uint\n"
4267             << "         %PCBlock = OpTypeStruct %uint\n"
4268             << "     %PCBlock_ptr = OpTypePointer PushConstant %PCBlock\n"
4269             << "              %pc = OpVariable %PCBlock_ptr PushConstant\n"
4270             << "     %pc_uint_ptr = OpTypePointer PushConstant %uint\n"
4271             << "          %uint_0 = OpConstant %uint 0\n"
4272             << "          %uint_1 = OpConstant %uint 1\n"
4273             << "            %bool = OpTypeBool\n"
4274             << "           %uvec3 = OpTypeVector %uint 3\n"
4275             << " %work_group_size = OpConstantComposite %uvec3 %uint_1 %uint_1 %uint_1\n"
4276             << "            %main = OpFunction %2 None %3\n"
4277             << "               %5 = OpLabel\n"
4278             << "   %payload_0_key = OpAccessChain %payload_uint_ptr %payload_0 %int_0\n"
4279             << "   %payload_1_key = OpAccessChain %payload_uint_ptr %payload_1 %int_0\n"
4280             << "   %payload_2_key = OpAccessChain %payload_uint_ptr %payload_2 %int_0\n"
4281             << "                    OpStore %payload_0_key %key_0\n"
4282             << "                    OpStore %payload_1_key %key_1\n"
4283             << "                    OpStore %payload_2_key %key_2\n"
4284             << "    %pc_index_ptr = OpAccessChain %pc_uint_ptr %pc %int_0\n"
4285             << "        %pc_index = OpLoad %uint %pc_index_ptr\n"
4286             << "              %23 = OpIEqual %bool %pc_index %uint_0\n"
4287             << "                    OpSelectionMerge %25 None\n"
4288             << "                    OpBranchConditional %23 %24 %27\n"
4289             << "              %24 = OpLabel\n"
4290             << "                    OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %payload_0\n"
4291             << "                    OpBranch %25\n"
4292             << "              %27 = OpLabel\n"
4293             << "              %30 = OpIEqual %bool %pc_index %uint_1\n"
4294             << "                    OpSelectionMerge %32 None\n"
4295             << "                    OpBranchConditional %30 %31 %33\n"
4296             << "              %31 = OpLabel\n"
4297             << "                    OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %payload_1\n"
4298             << "                    OpBranch %32\n"
4299             << "              %33 = OpLabel\n"
4300             << "                    OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %payload_2\n"
4301             << "                    OpBranch %32\n"
4302             << "              %32 = OpLabel\n"
4303             << "                    OpBranch %25\n"
4304             << "              %25 = OpLabel\n"
4305             << "                    OpReturn\n"
4306             << "                    OpFunctionEnd\n";
4307     programCollection.spirvAsmSources.add("task") << taskSPV.str() << spvBuildOptions;
4308 #endif
4309 }
4310 
generateReferenceLevel()4311 void MultipleTaskPayloadsInstance::generateReferenceLevel()
4312 {
4313     generateSolidRefLevel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), m_referenceLevel);
4314 }
4315 
iterate()4316 tcu::TestStatus MultipleTaskPayloadsInstance::iterate()
4317 {
4318     const auto &vkd       = m_context.getDeviceInterface();
4319     const auto device     = m_context.getDevice();
4320     auto &alloc           = m_context.getDefaultAllocator();
4321     const auto queueIndex = m_context.getUniversalQueueFamilyIndex();
4322     const auto queue      = m_context.getUniversalQueue();
4323 
4324     const auto imageFormat = getOutputFormat();
4325     const auto tcuFormat   = mapVkFormat(imageFormat);
4326     const auto imageExtent = makeExtent3D(m_params->width, m_params->height, 1u);
4327     const auto imageUsage  = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
4328 
4329     const VkImageCreateInfo colorBufferInfo = {
4330         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
4331         nullptr,                             // const void* pNext;
4332         0u,                                  // VkImageCreateFlags flags;
4333         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
4334         imageFormat,                         // VkFormat format;
4335         imageExtent,                         // VkExtent3D extent;
4336         1u,                                  // uint32_t mipLevels;
4337         1u,                                  // uint32_t arrayLayers;
4338         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
4339         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
4340         imageUsage,                          // VkImageUsageFlags usage;
4341         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
4342         0u,                                  // uint32_t queueFamilyIndexCount;
4343         nullptr,                             // const uint32_t* pQueueFamilyIndices;
4344         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
4345     };
4346 
4347     // Create color image and view.
4348     ImageWithMemory colorImage(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
4349     const auto colorSRR  = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4350     const auto colorSRL  = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4351     const auto colorView = makeImageView(vkd, device, colorImage.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
4352 
4353     // Create a memory buffer for verification.
4354     const auto verificationBufferSize =
4355         static_cast<VkDeviceSize>(imageExtent.width * imageExtent.height * tcu::getPixelSize(tcuFormat));
4356     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4357     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
4358 
4359     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4360     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
4361     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
4362 
4363     // Pipeline layout.
4364     const auto pcSize         = static_cast<uint32_t>(sizeof(uint32_t));
4365     const auto pcRange        = makePushConstantRange(VK_SHADER_STAGE_TASK_BIT_EXT, 0u, pcSize);
4366     const auto pipelineLayout = makePipelineLayout(vkd, device, DE_NULL, &pcRange);
4367 
4368     // Shader modules.
4369     const auto &binaries = m_context.getBinaryCollection();
4370     const auto hasTask   = binaries.contains("task");
4371 
4372     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
4373     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
4374 
4375     Move<VkShaderModule> taskShader;
4376     if (hasTask)
4377         taskShader = createShaderModule(vkd, device, binaries.get("task"));
4378 
4379     // Render pass.
4380     const auto renderPass = makeRenderPass(vkd, device, imageFormat);
4381 
4382     // Framebuffer.
4383     const auto framebuffer =
4384         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), imageExtent.width, imageExtent.height);
4385 
4386     // Viewport and scissor.
4387     const std::vector<VkViewport> viewports(1u, makeViewport(imageExtent));
4388     const std::vector<VkRect2D> scissors(1u, makeRect2D(imageExtent));
4389 
4390     // Color blending.
4391     const auto colorWriteMask =
4392         (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT);
4393     const VkPipelineColorBlendAttachmentState blendAttState = {
4394         VK_TRUE,             // VkBool32 blendEnable;
4395         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
4396         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstColorBlendFactor;
4397         VK_BLEND_OP_ADD,     // VkBlendOp colorBlendOp;
4398         VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
4399         VK_BLEND_FACTOR_ONE, // VkBlendFactor dstAlphaBlendFactor;
4400         VK_BLEND_OP_ADD,     // VkBlendOp alphaBlendOp;
4401         colorWriteMask,      // VkColorComponentFlags colorWriteMask;
4402     };
4403 
4404     const VkPipelineColorBlendStateCreateInfo colorBlendInfo = {
4405         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
4406         nullptr,                                                  // const void* pNext;
4407         0u,                                                       // VkPipelineColorBlendStateCreateFlags flags;
4408         VK_FALSE,                                                 // VkBool32 logicOpEnable;
4409         VK_LOGIC_OP_OR,                                           // VkLogicOp logicOp;
4410         1u,                                                       // uint32_t attachmentCount;
4411         &blendAttState,           // const VkPipelineColorBlendAttachmentState* pAttachments;
4412         {0.0f, 0.0f, 0.0f, 0.0f}, // float blendConstants[4];
4413     };
4414 
4415     const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
4416                                                fragShader.get(), renderPass.get(), viewports, scissors, 0u /*subpass*/,
4417                                                nullptr, nullptr, nullptr, &colorBlendInfo);
4418 
4419     // Command pool and buffer.
4420     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
4421     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4422     const auto cmdBuffer    = cmdBufferPtr.get();
4423 
4424     beginCommandBuffer(vkd, cmdBuffer);
4425 
4426     // Run pipeline.
4427     const tcu::Vec4 clearColor(0.0f, 0.0f, 0.0f, 0.0f);
4428     const auto drawCount  = m_params->drawCount();
4429     const uint32_t pcData = MultipleTaskPayloadsCase::kGoodKeyIdx;
4430     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor);
4431     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4432     vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), VK_SHADER_STAGE_TASK_BIT_EXT, 0u, pcSize, &pcData);
4433     vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
4434     endRenderPass(vkd, cmdBuffer);
4435 
4436     // Copy color buffer to verification buffer.
4437     const auto colorAccess   = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
4438     const auto transferRead  = VK_ACCESS_TRANSFER_READ_BIT;
4439     const auto transferWrite = VK_ACCESS_TRANSFER_WRITE_BIT;
4440     const auto hostRead      = VK_ACCESS_HOST_READ_BIT;
4441 
4442     const auto preCopyBarrier =
4443         makeImageMemoryBarrier(colorAccess, transferRead, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4444                                VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorImage.get(), colorSRR);
4445     const auto postCopyBarrier = makeMemoryBarrier(transferWrite, hostRead);
4446     const auto copyRegion      = makeBufferImageCopy(imageExtent, colorSRL);
4447 
4448     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
4449                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4450     vkd.cmdCopyImageToBuffer(cmdBuffer, colorImage.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4451                              verificationBuffer.get(), 1u, &copyRegion);
4452     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
4453                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4454 
4455     endCommandBuffer(vkd, cmdBuffer);
4456     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4457 
4458     // Generate reference image and compare results.
4459     const tcu::IVec3 iExtent(static_cast<int>(imageExtent.width), static_cast<int>(imageExtent.height), 1);
4460     const tcu::ConstPixelBufferAccess verificationAccess(tcuFormat, iExtent, verificationBufferData);
4461 
4462     generateReferenceLevel();
4463     invalidateAlloc(vkd, device, verificationBufferAlloc);
4464     if (!verifyResult(verificationAccess))
4465         TCU_FAIL("Result does not match reference; check log for details");
4466 
4467     return tcu::TestStatus::pass("Pass");
4468 }
4469 
4470 // Test multiple task/mesh draw calls and updating push constants and descriptors in between. We will divide the output image in 4
4471 // quadrants, and use each task/mesh draw call to draw on a particular quadrant. The output color in each quadrant will be composed
4472 // of data from different sources: storage buffer, sampled image or push constant value, and those will change before each draw
4473 // call. We'll prepare different descriptors for each quadrant.
4474 class RebindSetsCase : public MeshShaderMiscCase
4475 {
4476 public:
RebindSetsCase(tcu::TestContext & testCtx,const std::string & name,ParamsPtr params)4477     RebindSetsCase(tcu::TestContext &testCtx, const std::string &name, ParamsPtr params)
4478         : MeshShaderMiscCase(testCtx, name, std::move(params))
4479     {
4480         const auto drawCount = m_params->drawCount();
4481         DE_ASSERT(drawCount.x() == 1u && drawCount.y() == 1u && drawCount.z() == 1u);
4482         DE_UNREF(drawCount); // For release builds.
4483     }
~RebindSetsCase(void)4484     virtual ~RebindSetsCase(void)
4485     {
4486     }
4487 
4488     TestInstance *createInstance(Context &context) const override;
4489     void checkSupport(Context &context) const override;
4490     void initPrograms(vk::SourceCollections &programCollection) const override;
4491 };
4492 
4493 class RebindSetsInstance : public MeshShaderMiscInstance
4494 {
4495 public:
RebindSetsInstance(Context & context,const MiscTestParams * params)4496     RebindSetsInstance(Context &context, const MiscTestParams *params) : MeshShaderMiscInstance(context, params)
4497     {
4498     }
~RebindSetsInstance(void)4499     virtual ~RebindSetsInstance(void)
4500     {
4501     }
4502 
4503     void generateReferenceLevel() override;
4504     tcu::TestStatus iterate(void) override;
4505 
4506 protected:
4507     struct QuadrantInfo
4508     {
4509         // Offsets in framebuffer coordinates (0 to 2, final coordinates in range -1 to 1)
4510         float offsetX;
4511         float offsetY;
4512         tcu::Vec4 color;
4513 
QuadrantInfovkt::MeshShader::__anonb7c155300111::RebindSetsInstance::QuadrantInfo4514         QuadrantInfo(float offsetX_, float offsetY_, float red, float green, float blue)
4515             : offsetX(offsetX_)
4516             , offsetY(offsetY_)
4517             , color(red, green, blue, 1.0f)
4518         {
4519         }
4520     };
4521 
getQuadrantInfos()4522     static std::vector<QuadrantInfo> getQuadrantInfos()
4523     {
4524         std::vector<QuadrantInfo> infos;
4525         infos.reserve(4u);
4526 
4527         //                 offsets     rgb
4528         infos.emplace_back(0.0f, 0.0f, 1.0f, 0.0f, 1.0f);
4529         infos.emplace_back(1.0f, 0.0f, 1.0f, 1.0f, 0.0f);
4530         infos.emplace_back(0.0f, 1.0f, 0.0f, 0.0f, 1.0f);
4531         infos.emplace_back(1.0f, 1.0f, 0.0f, 1.0f, 1.0f);
4532 
4533         return infos;
4534     }
4535 
4536     struct PushConstants
4537     {
4538         float offsetX;
4539         float offsetY;
4540         float blueComponent;
4541     };
4542 };
4543 
createInstance(Context & context) const4544 TestInstance *RebindSetsCase::createInstance(Context &context) const
4545 {
4546     return new RebindSetsInstance(context, m_params.get());
4547 }
4548 
checkSupport(Context & context) const4549 void RebindSetsCase::checkSupport(Context &context) const
4550 {
4551     genericCheckSupport(context, true, false);
4552 }
4553 
initPrograms(vk::SourceCollections & programCollection) const4554 void RebindSetsCase::initPrograms(vk::SourceCollections &programCollection) const
4555 {
4556     const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
4557 
4558     // Generic fragment shader.
4559     MeshShaderMiscCase::initPrograms(programCollection);
4560 
4561     const std::string ssbo =
4562         "layout (set=0, binding=0, std430) readonly buffer SSBOBlock { float redComponent; } ssbo;\n";
4563     const std::string combined = "layout (set=0, binding=1) uniform sampler2D greenComponent;\n";
4564     const std::string pc =
4565         "layout (push_constant, std430) uniform PCBlock { float offsetX; float offsetY; float blueComponent; } pc;\n";
4566     const std::string payload = "struct TaskData { float redComponent; }; taskPayloadSharedEXT TaskData td;\n";
4567 
4568     std::ostringstream task;
4569     task << "#version 450\n"
4570          << "#extension GL_EXT_mesh_shader : enable\n"
4571          << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
4572          << "\n"
4573          << ssbo << payload << "\n"
4574          << "void main (void)\n"
4575          << "{\n"
4576          << "    td.redComponent = ssbo.redComponent;\n"
4577          << "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
4578          << "}\n";
4579     programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
4580 
4581     std::ostringstream mesh;
4582     mesh << "#version 450\n"
4583          << "#extension GL_EXT_mesh_shader : enable\n"
4584          << "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
4585          << "layout (triangles) out;\n"
4586          << "layout (max_vertices=4, max_primitives=2) out;\n"
4587          << "\n"
4588          << combined << pc << payload << "layout (location=0) out perprimitiveEXT vec4 primitiveColor[];\n"
4589          << "\n"
4590          << "void main (void)\n"
4591          << "{\n"
4592          << "    SetMeshOutputsEXT(4u, 2u);\n"
4593          << "\n"
4594          << "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0 + pc.offsetX, -1.0 + pc.offsetY, 0.0, 1.0);\n"
4595          << "    gl_MeshVerticesEXT[1].gl_Position = vec4( 0.0 + pc.offsetX, -1.0 + pc.offsetY, 0.0, 1.0);\n"
4596          << "    gl_MeshVerticesEXT[2].gl_Position = vec4(-1.0 + pc.offsetX,  0.0 + pc.offsetY, 0.0, 1.0);\n"
4597          << "    gl_MeshVerticesEXT[3].gl_Position = vec4( 0.0 + pc.offsetX,  0.0 + pc.offsetY, 0.0, 1.0);\n"
4598          << "\n"
4599          << "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(2u, 1u, 0u);\n"
4600          << "    gl_PrimitiveTriangleIndicesEXT[1] = uvec3(2u, 3u, 1u);\n"
4601          << "\n"
4602          << "    const vec4 primColor = vec4(td.redComponent, texture(greenComponent, vec2(0.5, 0.5)).x, "
4603             "pc.blueComponent, 1.0);\n"
4604          << "    primitiveColor[0] = primColor;\n"
4605          << "    primitiveColor[1] = primColor;\n"
4606          << "}\n";
4607     programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
4608 }
4609 
generateReferenceLevel()4610 void RebindSetsInstance::generateReferenceLevel()
4611 {
4612     const auto iWidth  = static_cast<int>(m_params->width);
4613     const auto iHeight = static_cast<int>(m_params->height);
4614     const auto fWidth  = static_cast<float>(iWidth);
4615     const auto fHeight = static_cast<float>(iHeight);
4616 
4617     DE_ASSERT(iWidth % 2 == 0);
4618     DE_ASSERT(iHeight % 2 == 0);
4619 
4620     const auto halfWidth  = iWidth / 2;
4621     const auto halfHeight = iHeight / 2;
4622 
4623     const auto format    = getOutputFormat();
4624     const auto tcuFormat = mapVkFormat(format);
4625 
4626     m_referenceLevel.reset(new tcu::TextureLevel(tcuFormat, iWidth, iHeight));
4627     const auto access = m_referenceLevel->getAccess();
4628 
4629     const auto quadrantInfos = getQuadrantInfos();
4630     DE_ASSERT(quadrantInfos.size() == 4u);
4631 
4632     for (const auto &quadrantInfo : quadrantInfos)
4633     {
4634         const auto xCorner   = static_cast<int>(quadrantInfo.offsetX / 2.0f * fWidth);
4635         const auto yCorner   = static_cast<int>(quadrantInfo.offsetY / 2.0f * fHeight);
4636         const auto subregion = tcu::getSubregion(access, xCorner, yCorner, halfWidth, halfHeight);
4637 
4638         tcu::clear(subregion, quadrantInfo.color);
4639     }
4640 }
4641 
iterate(void)4642 tcu::TestStatus RebindSetsInstance::iterate(void)
4643 {
4644     const auto &vkd          = m_context.getDeviceInterface();
4645     const auto device        = m_context.getDevice();
4646     auto &alloc              = m_context.getDefaultAllocator();
4647     const auto queueIndex    = m_context.getUniversalQueueFamilyIndex();
4648     const auto queue         = m_context.getUniversalQueue();
4649     const auto quadrantInfos = getQuadrantInfos();
4650     const auto setCount      = static_cast<uint32_t>(quadrantInfos.size());
4651     const auto textureExtent = makeExtent3D(1u, 1u, 1u);
4652     const tcu::IVec3 iTexExtent(static_cast<int>(textureExtent.width), static_cast<int>(textureExtent.height),
4653                                 static_cast<int>(textureExtent.depth));
4654     const auto textureFormat  = VK_FORMAT_R8G8B8A8_UNORM;
4655     const auto tcuTexFormat   = mapVkFormat(textureFormat);
4656     const auto textureUsage   = (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
4657     const auto colorExtent    = makeExtent3D(m_params->width, m_params->height, 1u);
4658     const auto colorFormat    = getOutputFormat();
4659     const auto tcuColorFormat = mapVkFormat(colorFormat);
4660     const auto colorUsage     = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
4661 
4662     DE_ASSERT(quadrantInfos.size() == 4u);
4663 
4664     // We need 4 descriptor sets: 4 buffers, 4 images and 1 sampler.
4665     const VkSamplerCreateInfo samplerCreateInfo = initVulkanStructure();
4666     const auto sampler                          = createSampler(vkd, device, &samplerCreateInfo);
4667 
4668     // Buffers.
4669     const auto ssboSize       = static_cast<VkDeviceSize>(sizeof(float));
4670     const auto ssboCreateInfo = makeBufferCreateInfo(ssboSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4671 
4672     std::vector<std::unique_ptr<BufferWithMemory>> ssbos;
4673     ssbos.reserve(quadrantInfos.size());
4674     for (const auto &quadrantInfo : quadrantInfos)
4675     {
4676         ssbos.emplace_back(new BufferWithMemory(vkd, device, alloc, ssboCreateInfo, MemoryRequirement::HostVisible));
4677         void *data              = ssbos.back()->getAllocation().getHostPtr();
4678         const auto redComponent = quadrantInfo.color.x();
4679         deMemcpy(data, &redComponent, sizeof(redComponent));
4680     }
4681 
4682     // Textures.
4683     const VkImageCreateInfo textureCreateInfo = {
4684         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
4685         nullptr,                             // const void* pNext;
4686         0u,                                  // VkImageCreateFlags flags;
4687         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
4688         textureFormat,                       // VkFormat format;
4689         textureExtent,                       // VkExtent3D extent;
4690         1u,                                  // uint32_t mipLevels;
4691         1u,                                  // uint32_t arrayLayers;
4692         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
4693         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
4694         textureUsage,                        // VkImageUsageFlags usage;
4695         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
4696         0u,                                  // uint32_t queueFamilyIndexCount;
4697         nullptr,                             // const uint32_t* pQueueFamilyIndices;
4698         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
4699     };
4700     const auto textureSRR        = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4701     const auto textureSRL        = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4702     const auto textureCopyRegion = makeBufferImageCopy(textureExtent, textureSRL);
4703 
4704     std::vector<std::unique_ptr<ImageWithMemory>> textures;
4705     for (size_t i = 0u; i < quadrantInfos.size(); ++i)
4706         textures.emplace_back(new ImageWithMemory(vkd, device, alloc, textureCreateInfo, MemoryRequirement::Any));
4707 
4708     std::vector<Move<VkImageView>> textureViews;
4709     textureViews.reserve(quadrantInfos.size());
4710     for (const auto &texture : textures)
4711         textureViews.push_back(
4712             makeImageView(vkd, device, texture->get(), VK_IMAGE_VIEW_TYPE_2D, textureFormat, textureSRR));
4713 
4714     // Auxiliar buffers to fill the images with the right colors.
4715     const auto pixelSize  = tcu::getPixelSize(tcuTexFormat);
4716     const auto pixelCount = textureExtent.width * textureExtent.height * textureExtent.depth;
4717     const auto auxiliarBufferSize =
4718         static_cast<VkDeviceSize>(static_cast<VkDeviceSize>(pixelSize) * static_cast<VkDeviceSize>(pixelCount));
4719     const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(auxiliarBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
4720 
4721     std::vector<std::unique_ptr<BufferWithMemory>> auxiliarBuffers;
4722     auxiliarBuffers.reserve(quadrantInfos.size());
4723     for (const auto &quadrantInfo : quadrantInfos)
4724     {
4725         auxiliarBuffers.emplace_back(
4726             new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, MemoryRequirement::HostVisible));
4727 
4728         void *data = auxiliarBuffers.back()->getAllocation().getHostPtr();
4729         tcu::PixelBufferAccess access(tcuTexFormat, iTexExtent, data);
4730         const tcu::Vec4 quadrantColor(quadrantInfo.color.y(), 0.0f, 0.0f, 1.0f);
4731 
4732         tcu::clear(access, quadrantColor);
4733     }
4734 
4735     // Descriptor set layout.
4736     DescriptorSetLayoutBuilder layoutBuilder;
4737     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_TASK_BIT_EXT);
4738     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_MESH_BIT_EXT);
4739     const auto setLayout = layoutBuilder.build(vkd, device);
4740 
4741     // Pipeline layout.
4742     const auto pcSize         = static_cast<uint32_t>(sizeof(PushConstants));
4743     const auto pcRange        = makePushConstantRange(VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize);
4744     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get(), &pcRange);
4745 
4746     // Descriptor pool and sets.
4747     DescriptorPoolBuilder poolBuilder;
4748     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, setCount);
4749     poolBuilder.addType(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, setCount);
4750     const auto descriptorPool =
4751         poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, setCount);
4752 
4753     std::vector<Move<VkDescriptorSet>> descriptorSets;
4754     for (size_t i = 0; i < quadrantInfos.size(); ++i)
4755         descriptorSets.push_back(makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get()));
4756 
4757     // Update descriptor sets.
4758     DescriptorSetUpdateBuilder updateBuilder;
4759     for (size_t i = 0; i < descriptorSets.size(); ++i)
4760     {
4761         const auto &descriptorSet = descriptorSets.at(i);
4762         const auto &ssbo          = ssbos.at(i);
4763         const auto &textureView   = textureViews.at(i);
4764         const auto descBufferInfo = makeDescriptorBufferInfo(ssbo->get(), 0ull, ssboSize);
4765         const auto descImageInfo =
4766             makeDescriptorImageInfo(sampler.get(), textureView.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
4767 
4768         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
4769                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descBufferInfo);
4770         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
4771                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, &descImageInfo);
4772     }
4773     updateBuilder.update(vkd, device);
4774 
4775     // Color attachment.
4776     const VkImageCreateInfo colorCreateInfo = {
4777         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
4778         nullptr,                             // const void* pNext;
4779         0u,                                  // VkImageCreateFlags flags;
4780         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
4781         colorFormat,                         // VkFormat format;
4782         colorExtent,                         // VkExtent3D extent;
4783         1u,                                  // uint32_t mipLevels;
4784         1u,                                  // uint32_t arrayLayers;
4785         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
4786         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
4787         colorUsage,                          // VkImageUsageFlags usage;
4788         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
4789         0u,                                  // uint32_t queueFamilyIndexCount;
4790         nullptr,                             // const uint32_t* pQueueFamilyIndices;
4791         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
4792     };
4793     const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
4794     const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
4795 
4796     ImageWithMemory colorAttachment(vkd, device, alloc, colorCreateInfo, MemoryRequirement::Any);
4797     const auto colorView =
4798         makeImageView(vkd, device, colorAttachment.get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
4799 
4800     // Create a memory buffer for verification.
4801     const auto verificationBufferSize =
4802         static_cast<VkDeviceSize>(colorExtent.width * colorExtent.height * tcu::getPixelSize(tcuColorFormat));
4803     const auto verificationBufferUsage = (VK_BUFFER_USAGE_TRANSFER_DST_BIT);
4804     const auto verificationBufferInfo  = makeBufferCreateInfo(verificationBufferSize, verificationBufferUsage);
4805 
4806     BufferWithMemory verificationBuffer(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
4807     auto &verificationBufferAlloc = verificationBuffer.getAllocation();
4808     void *verificationBufferData  = verificationBufferAlloc.getHostPtr();
4809 
4810     // Render pass and framebuffer.
4811     const auto renderPass = makeRenderPass(vkd, device, colorFormat);
4812     const auto framebuffer =
4813         makeFramebuffer(vkd, device, renderPass.get(), colorView.get(), colorExtent.width, colorExtent.height);
4814 
4815     const std::vector<VkViewport> viewports(1u, makeViewport(colorExtent));
4816     const std::vector<VkRect2D> scissors(1u, makeRect2D(colorExtent));
4817 
4818     // Shader modules and pipeline.
4819     const auto &binaries  = m_context.getBinaryCollection();
4820     const auto taskShader = createShaderModule(vkd, device, binaries.get("task"));
4821     const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh"));
4822     const auto fragShader = createShaderModule(vkd, device, binaries.get("frag"));
4823     const auto pipeline   = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(),
4824                                                  fragShader.get(), renderPass.get(), viewports, scissors);
4825 
4826     // Command pool and buffer.
4827     const auto cmdPool      = makeCommandPool(vkd, device, queueIndex);
4828     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4829     const auto cmdBuffer    = cmdBufferPtr.get();
4830 
4831     beginCommandBuffer(vkd, cmdBuffer);
4832 
4833     // Copy data from auxiliar buffers to textures.
4834     for (const auto &texture : textures)
4835     {
4836         const auto prepareTextureForCopy =
4837             makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
4838                                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, texture->get(), textureSRR);
4839         cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
4840                                       &prepareTextureForCopy);
4841     }
4842 
4843     for (size_t i = 0; i < auxiliarBuffers.size(); ++i)
4844     {
4845         const auto &auxBuffer = auxiliarBuffers.at(i);
4846         const auto &texture   = textures.at(i);
4847         vkd.cmdCopyBufferToImage(cmdBuffer, auxBuffer->get(), texture->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u,
4848                                  &textureCopyRegion);
4849     }
4850 
4851     // Prepare textures for sampling.
4852     for (const auto &texture : textures)
4853     {
4854         const auto prepareTextureForSampling = makeImageMemoryBarrier(
4855             VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
4856             VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, texture->get(), textureSRR);
4857         cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
4858                                       VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT, &prepareTextureForSampling);
4859     }
4860 
4861     // Render stuff.
4862     beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u),
4863                     tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));
4864     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get());
4865 
4866     const auto drawCount = m_params->drawCount();
4867     for (size_t i = 0; i < quadrantInfos.size(); ++i)
4868     {
4869         const auto &quadrantInfo  = quadrantInfos.at(i);
4870         const auto &descriptorSet = descriptorSets.at(i);
4871 
4872         PushConstants pcData;
4873         pcData.blueComponent = quadrantInfo.color.z();
4874         pcData.offsetX       = quadrantInfo.offsetX;
4875         pcData.offsetY       = quadrantInfo.offsetY;
4876 
4877         vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u,
4878                                   &descriptorSet.get(), 0u, nullptr);
4879         vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), VK_SHADER_STAGE_MESH_BIT_EXT, 0u, pcSize, &pcData);
4880         vkd.cmdDrawMeshTasksEXT(cmdBuffer, drawCount.x(), drawCount.y(), drawCount.z());
4881     }
4882 
4883     endRenderPass(vkd, cmdBuffer);
4884 
4885     // Copy color attachment to verification buffer.
4886     const auto preCopyBarrier = makeImageMemoryBarrier(
4887         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4888         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorAttachment.get(), colorSRR);
4889     const auto postCopyBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4890     const auto copyRegion      = makeBufferImageCopy(colorExtent, colorSRL);
4891 
4892     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
4893                            0u, nullptr, 0u, nullptr, 1u, &preCopyBarrier);
4894     vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
4895                              verificationBuffer.get(), 1u, &copyRegion);
4896     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
4897                            &postCopyBarrier, 0u, nullptr, 0u, nullptr);
4898 
4899     endCommandBuffer(vkd, cmdBuffer);
4900     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
4901 
4902     // Generate reference image and compare results.
4903     const tcu::IVec3 iColorExtent(static_cast<int>(colorExtent.width), static_cast<int>(colorExtent.height), 1);
4904     const tcu::ConstPixelBufferAccess verificationAccess(tcuColorFormat, iColorExtent, verificationBufferData);
4905 
4906     generateReferenceLevel();
4907     invalidateAlloc(vkd, device, verificationBufferAlloc);
4908     if (!verifyResult(verificationAccess))
4909         TCU_FAIL("Result does not match reference; check log for details");
4910 
4911     return tcu::TestStatus::pass("Pass");
4912 }
4913 
4914 } // anonymous namespace
4915 
createMeshShaderMiscTestsEXT(tcu::TestContext & testCtx)4916 tcu::TestCaseGroup *createMeshShaderMiscTestsEXT(tcu::TestContext &testCtx)
4917 {
4918     GroupPtr miscTests(new tcu::TestCaseGroup(testCtx, "misc"));
4919 
4920     {
4921         ParamsPtr paramsPtr(new MiscTestParams(
4922             /*taskCount*/ tcu::just(tcu::UVec3(2u, 1u, 1u)),
4923             /*meshCount*/ tcu::UVec3(2u, 1u, 1u),
4924             /*width*/ 8u,
4925             /*height*/ 8u));
4926 
4927         // Pass a complex structure from the task to the mesh shader
4928         miscTests->addChild(new ComplexTaskDataCase(testCtx, "complex_task_data", std::move(paramsPtr)));
4929     }
4930 
4931     {
4932         ParamsPtr paramsPtr(new MiscTestParams(
4933             /*taskCount*/ tcu::Nothing,
4934             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
4935             /*width*/ 5u,    // Use an odd value so there's a pixel in the exact center.
4936             /*height*/ 7u)); // Idem.
4937 
4938         // Draw a single point
4939         miscTests->addChild(new SinglePointCase(testCtx, "single_point", std::move(paramsPtr)));
4940     }
4941 
4942     {
4943         ParamsPtr paramsPtr(new MiscTestParams(
4944             /*taskCount*/ tcu::Nothing,
4945             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
4946             /*width*/ 5u,    // Use an odd value so there's a pixel in the exact center.
4947             /*height*/ 7u)); // Idem.
4948 
4949         // VK_KHR_maintenance5: Test default point size is 1.0f
4950         // Draw a single point without writing to PointSize
4951         miscTests->addChild(new SinglePointCase(testCtx, "single_point_default_size", std::move(paramsPtr), false));
4952     }
4953 
4954     {
4955         ParamsPtr paramsPtr(new MiscTestParams(
4956             /*taskCount*/ tcu::Nothing,
4957             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
4958             /*width*/ 8u,
4959             /*height*/ 5u)); // Use an odd value so there's a center line.
4960 
4961         // Draw a single line
4962         miscTests->addChild(new SingleLineCase(testCtx, "single_line", std::move(paramsPtr)));
4963     }
4964 
4965     {
4966         ParamsPtr paramsPtr(new MiscTestParams(
4967             /*taskCount*/ tcu::Nothing,
4968             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
4969             /*width*/ 5u,    // Use an odd value so there's a pixel in the exact center.
4970             /*height*/ 7u)); // Idem.
4971 
4972         // Draw a single triangle
4973         miscTests->addChild(new SingleTriangleCase(testCtx, "single_triangle", std::move(paramsPtr)));
4974     }
4975 
4976     {
4977         ParamsPtr paramsPtr(new MiscTestParams(
4978             /*taskCount*/ tcu::Nothing,
4979             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
4980             /*width*/ 16u,
4981             /*height*/ 16u));
4982 
4983         // Draw the maximum number of points
4984         miscTests->addChild(new MaxPointsCase(testCtx, "max_points", std::move(paramsPtr)));
4985     }
4986 
4987     {
4988         ParamsPtr paramsPtr(new MiscTestParams(
4989             /*taskCount*/ tcu::Nothing,
4990             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
4991             /*width*/ 1u,
4992             /*height*/ 1020u));
4993 
4994         // Draw the maximum number of lines
4995         miscTests->addChild(new MaxLinesCase(testCtx, "max_lines", std::move(paramsPtr)));
4996     }
4997 
4998     {
4999         const tcu::UVec3 localSizes[] = {
5000             tcu::UVec3(2u, 4u, 8u),
5001             tcu::UVec3(4u, 2u, 4u),
5002             tcu::UVec3(2u, 2u, 4u),
5003         };
5004 
5005         // Draw the maximum number of triangles using a work group size of...
5006         for (const auto &localSize : localSizes)
5007         {
5008             const auto workGroupSize = (localSize.x() * localSize.y() * localSize.z());
5009             const auto wgsStr        = std::to_string(workGroupSize);
5010             const auto testName      = "max_triangles_workgroupsize_" + wgsStr;
5011 
5012             ParamsPtr paramsPtr(new MaxTrianglesCase::Params(
5013                 /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5014                 /*width*/ 512u,
5015                 /*height*/ 512u,
5016                 /*localSize*/ localSize));
5017 
5018             miscTests->addChild(new MaxTrianglesCase(testCtx, testName, std::move(paramsPtr)));
5019         }
5020     }
5021 
5022     using LargeWorkGroupParamsPtr = std::unique_ptr<LargeWorkGroupParams>;
5023     const int dimensionCases[]    = {0, 1, 2};
5024 
5025     for (const auto &dim : dimensionCases)
5026     {
5027         const auto dimChar = dimSuffix(dim);
5028 
5029         {
5030             tcu::UVec3 taskCount(8u, 8u, 8u);
5031             taskCount[dim] = 65535u;
5032 
5033             LargeWorkGroupParamsPtr lwgParamsPtr(new LargeWorkGroupParams(
5034                 /*taskCount*/ tcu::just(taskCount),
5035                 /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5036                 /*width*/ 2040u,
5037                 /*height*/ 2056u,
5038                 /*localInvocations*/ tcu::UVec3(1u, 1u, 1u)));
5039 
5040             ParamsPtr paramsPtr(lwgParamsPtr.release());
5041 
5042             const auto name = std::string("many_task_work_groups_") + dimChar;
5043 
5044             miscTests->addChild(new LargeWorkGroupCase(testCtx, name, std::move(paramsPtr)));
5045         }
5046 
5047         {
5048             tcu::UVec3 meshCount(8u, 8u, 8u);
5049             meshCount[dim] = 65535u;
5050 
5051             LargeWorkGroupParamsPtr lwgParamsPtr(new LargeWorkGroupParams(
5052                 /*taskCount*/ tcu::Nothing,
5053                 /*meshCount*/ meshCount,
5054                 /*width*/ 2040u,
5055                 /*height*/ 2056u,
5056                 /*localInvocations*/ tcu::UVec3(1u, 1u, 1u)));
5057 
5058             ParamsPtr paramsPtr(lwgParamsPtr.release());
5059 
5060             const auto name = std::string("many_mesh_work_groups_") + dimChar;
5061 
5062             miscTests->addChild(new LargeWorkGroupCase(testCtx, name, std::move(paramsPtr)));
5063         }
5064 
5065         {
5066             tcu::UVec3 meshCount(1u, 1u, 1u);
5067             tcu::UVec3 taskCount(1u, 1u, 1u);
5068             tcu::UVec3 localInvs(1u, 1u, 1u);
5069 
5070             meshCount[dim] = 256u;
5071             taskCount[dim] = 128u;
5072             localInvs[dim] = 128u;
5073 
5074             LargeWorkGroupParamsPtr lwgParamsPtr(new LargeWorkGroupParams(
5075                 /*taskCount*/ tcu::just(taskCount),
5076                 /*meshCount*/ meshCount,
5077                 /*width*/ 2048u,
5078                 /*height*/ 2048u,
5079                 /*localInvocations*/ localInvs));
5080 
5081             ParamsPtr paramsPtr(lwgParamsPtr.release());
5082 
5083             const auto name = std::string("many_task_mesh_work_groups_") + dimChar;
5084 
5085             miscTests->addChild(new LargeWorkGroupCase(testCtx, name, std::move(paramsPtr)));
5086         }
5087     }
5088 
5089     {
5090         const PrimitiveType types[] = {
5091             PrimitiveType::POINTS,
5092             PrimitiveType::LINES,
5093             PrimitiveType::TRIANGLES,
5094         };
5095 
5096         for (int i = 0; i < 2; ++i)
5097         {
5098             const bool extraWrites = (i > 0);
5099 
5100             // XXX Is this test legal? [https://gitlab.khronos.org/GLSL/GLSL/-/merge_requests/77#note_348252]
5101             if (extraWrites)
5102                 continue;
5103 
5104             for (const auto primType : types)
5105             {
5106                 std::unique_ptr<NoPrimitivesParams> params(new NoPrimitivesParams(
5107                     /*taskCount*/ (extraWrites ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5108                     /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5109                     /*width*/ 16u,
5110                     /*height*/ 16u,
5111                     /*primitiveType*/ primType));
5112 
5113                 ParamsPtr paramsPtr(params.release());
5114                 const auto primName    = primitiveTypeName(primType);
5115                 const std::string name = "no_" + primName + (extraWrites ? "_extra_writes" : "");
5116 
5117                 miscTests->addChild(extraWrites ?
5118                                         (new NoPrimitivesExtraWritesCase(testCtx, name, std::move(paramsPtr))) :
5119                                         (new NoPrimitivesCase(testCtx, name, std::move(paramsPtr))));
5120             }
5121         }
5122     }
5123 
5124     {
5125         for (int i = 0; i < 2; ++i)
5126         {
5127             const bool useTaskShader = (i == 0);
5128 
5129             ParamsPtr paramsPtr(new MiscTestParams(
5130                 /*taskCount*/ (useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5131                 /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5132                 /*width*/ 1u,
5133                 /*height*/ 1u));
5134 
5135             const std::string shader = (useTaskShader ? "task" : "mesh");
5136             const std::string name   = "barrier_in_" + shader;
5137 
5138             miscTests->addChild(new SimpleBarrierCase(testCtx, name, std::move(paramsPtr)));
5139         }
5140     }
5141 
5142     {
5143         const struct
5144         {
5145             MemoryBarrierType memBarrierType;
5146             std::string caseName;
5147         } barrierTypes[] = {
5148             {MemoryBarrierType::SHARED, "memory_barrier_shared"},
5149             {MemoryBarrierType::GROUP, "group_memory_barrier"},
5150         };
5151 
5152         for (const auto &barrierCase : barrierTypes)
5153         {
5154             for (int i = 0; i < 2; ++i)
5155             {
5156                 const bool useTaskShader = (i == 0);
5157 
5158                 std::unique_ptr<MemoryBarrierParams> paramsPtr(new MemoryBarrierParams(
5159                     /*taskCount*/ (useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5160                     /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5161                     /*width*/ 1u,
5162                     /*height*/ 1u,
5163                     /*memBarrierType*/ barrierCase.memBarrierType));
5164 
5165                 const std::string shader = (useTaskShader ? "task" : "mesh");
5166                 const std::string name   = barrierCase.caseName + "_in_" + shader;
5167 
5168                 miscTests->addChild(new MemoryBarrierCase(testCtx, name, std::move(paramsPtr)));
5169             }
5170         }
5171     }
5172 
5173     {
5174         for (int i = 0; i < 2; ++i)
5175         {
5176             const bool useTaskShader = (i > 0);
5177             const auto name          = std::string("custom_attributes") + (useTaskShader ? "_and_task_shader" : "");
5178 
5179             ParamsPtr paramsPtr(new MiscTestParams(
5180                 /*taskCount*/ (useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5181                 /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5182                 /*width*/ 32u,
5183                 /*height*/ 32u));
5184 
5185             miscTests->addChild(new CustomAttributesCase(testCtx, name, std::move(paramsPtr)));
5186         }
5187     }
5188 
5189     {
5190         for (int i = 0; i < 2; ++i)
5191         {
5192             const bool useTaskShader = (i > 0);
5193             const auto name          = std::string("push_constant") + (useTaskShader ? "_and_task_shader" : "");
5194 
5195             ParamsPtr paramsPtr(new MiscTestParams(
5196                 /*taskCount*/ (useTaskShader ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::Nothing),
5197                 /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5198                 /*width*/ 16u,
5199                 /*height*/ 16u));
5200 
5201             miscTests->addChild(new PushConstantCase(testCtx, name, std::move(paramsPtr)));
5202         }
5203     }
5204 
5205     {
5206         ParamsPtr paramsPtr(new MaximizeThreadsParams(
5207             /*taskCount*/ tcu::Nothing,
5208             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5209             /*width*/ 128u,
5210             /*height*/ 1u,
5211             /*localSize*/ 32u,
5212             /*numVertices*/ 128u,
5213             /*numPrimitives*/ 256u));
5214 
5215         miscTests->addChild(new MaximizePrimitivesCase(testCtx, "maximize_primitives", std::move(paramsPtr)));
5216     }
5217 
5218     {
5219         ParamsPtr paramsPtr(new MaximizeThreadsParams(
5220             /*taskCount*/ tcu::Nothing,
5221             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5222             /*width*/ 64u,
5223             /*height*/ 1u,
5224             /*localSize*/ 32u,
5225             /*numVertices*/ 256u,
5226             /*numPrimitives*/ 128u));
5227 
5228         miscTests->addChild(new MaximizeVerticesCase(testCtx, "maximize_vertices", std::move(paramsPtr)));
5229     }
5230 
5231     {
5232         const uint32_t kInvocationCases[] = {32u, 64u, 128u, 256u};
5233 
5234         for (const auto &invocationCase : kInvocationCases)
5235         {
5236             const auto invsStr   = std::to_string(invocationCase);
5237             const auto numPixels = invocationCase / 2u;
5238 
5239             ParamsPtr paramsPtr(new MaximizeThreadsParams(
5240                 /*taskCount*/ tcu::Nothing,
5241                 /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5242                 /*width*/ numPixels,
5243                 /*height*/ 1u,
5244                 /*localSize*/ invocationCase,
5245                 /*numVertices*/ numPixels,
5246                 /*numPrimitives*/ numPixels));
5247 
5248             miscTests->addChild(
5249                 new MaximizeInvocationsCase(testCtx, "maximize_invocations_" + invsStr, std::move(paramsPtr)));
5250         }
5251     }
5252 
5253     {
5254         for (int i = 0; i < 2; ++i)
5255         {
5256             const bool useDynamicTopology = (i > 0);
5257 
5258             ParamsPtr paramsPtr(new MixedPipelinesParams(
5259                 /*taskCount*/ tcu::Nothing,
5260                 /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5261                 /*width*/ 8u,
5262                 /*height*/ 8u,
5263                 /*dynamicTopology*/ useDynamicTopology));
5264 
5265             const std::string nameSuffix = (useDynamicTopology ? "_dynamic_topology" : "");
5266             const std::string descSuffix = (useDynamicTopology ? " and use dynamic topology" : "");
5267 
5268             miscTests->addChild(new MixedPipelinesCase(testCtx, "mixed_pipelines" + nameSuffix, std::move(paramsPtr)));
5269         }
5270     }
5271 
5272     for (int i = 0; i < 2; ++i)
5273     {
5274         const bool useTask = (i > 0);
5275         const tcu::Maybe<tcu::UVec3> taskCount =
5276             (useTask ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::nothing<tcu::UVec3>());
5277         const std::string testName = std::string("first_invocation_") + (useTask ? "task" : "mesh");
5278 
5279         ParamsPtr paramsPtr(new MiscTestParams(
5280             /*taskCount*/ taskCount,
5281             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5282             /*width*/ 128u,
5283             /*height*/ 1u));
5284 
5285         miscTests->addChild(new FirstInvocationCase(testCtx, testName, std::move(paramsPtr)));
5286     }
5287 
5288     for (int i = 0; i < 2; ++i)
5289     {
5290         const bool useTask = (i > 0);
5291         const tcu::Maybe<tcu::UVec3> taskCount =
5292             (useTask ? tcu::just(tcu::UVec3(1u, 1u, 1u)) : tcu::nothing<tcu::UVec3>());
5293         const std::string testName = std::string("local_size_id_") + (useTask ? "task" : "mesh");
5294 
5295         ParamsPtr paramsPtr(new MiscTestParams(
5296             /*taskCount*/ taskCount,
5297             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5298             /*width*/ 32u,
5299             /*height*/ 1u));
5300 
5301         miscTests->addChild(new LocalSizeIdCase(testCtx, testName, std::move(paramsPtr)));
5302     }
5303 
5304     if (false) // Disabled. This may be illegal.
5305     {
5306         ParamsPtr paramsPtr(new MiscTestParams(
5307             /*taskCount*/ tcu::UVec3(1u, 1u, 1u),
5308             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5309             /*width*/ 8u,
5310             /*height*/ 8u));
5311 
5312         miscTests->addChild(new MultipleTaskPayloadsCase(testCtx, "multiple_task_payloads", std::move(paramsPtr)));
5313     }
5314 
5315     {
5316         ParamsPtr paramsPtr(new MiscTestParams(
5317             /*taskCount*/ tcu::UVec3(1u, 1u, 1u),
5318             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5319             /*width*/ 8u,
5320             /*height*/ 8u));
5321 
5322         miscTests->addChild(new PayloadReadCase(testCtx, "payload_read", std::move(paramsPtr)));
5323     }
5324 
5325     {
5326         ParamsPtr paramsPtr(new MiscTestParams(
5327             /*taskCount*/ tcu::UVec3(1u, 1u, 1u),
5328             /*meshCount*/ tcu::UVec3(1u, 1u, 1u),
5329             /*width*/ 8u,
5330             /*height*/ 8u));
5331 
5332         miscTests->addChild(new RebindSetsCase(testCtx, "rebind_sets", std::move(paramsPtr)));
5333     }
5334 
5335     return miscTests.release();
5336 }
5337 
5338 } // namespace MeshShader
5339 } // namespace vkt
5340